diff options
author | Aurabindo Pillai <aurabindo.pillai@amd.com> | 2024-04-19 19:02:53 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-04-27 00:23:13 +0300 |
commit | 70839da6360500a82e4d5f78499284474cbed7c1 (patch) | |
tree | a2edcc28ff06f22167cf6c7f86cba499b87c41fd /drivers/gpu | |
parent | 59a0c03a50e7f058e599c313e6a129562147016d (diff) | |
download | linux-70839da6360500a82e4d5f78499284474cbed7c1.tar.xz |
drm/amd/display: Add new DCN401 sources
Add initial support for DCN 4.0.1.
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
103 files changed, 61642 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h new file mode 100644 index 000000000000..0d2584437934 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef DALSMC_H +#define DALSMC_H + +#define DALSMC_VERSION 0x1 + +// SMU Response Codes: +#define DALSMC_Result_OK 0x1 +#define DALSMC_Result_Failed 0xFF +#define DALSMC_Result_UnknownCmd 0xFE +#define DALSMC_Result_CmdRejectedPrereq 0xFD +#define DALSMC_Result_CmdRejectedBusy 0xFC + + + +// Message Definitions: +#define DALSMC_MSG_TestMessage 0x1 +#define DALSMC_MSG_GetSmuVersion 0x2 +#define DALSMC_MSG_GetDriverIfVersion 0x3 +#define DALSMC_MSG_GetMsgHeaderVersion 0x4 +#define DALSMC_MSG_SetDalDramAddrHigh 0x5 +#define DALSMC_MSG_SetDalDramAddrLow 0x6 +#define DALSMC_MSG_TransferTableSmu2Dram 0x7 +#define DALSMC_MSG_TransferTableDram2Smu 0x8 +#define DALSMC_MSG_SetHardMinByFreq 0x9 +#define DALSMC_MSG_SetHardMaxByFreq 0xA +#define DALSMC_MSG_GetDpmFreqByIndex 0xB +#define DALSMC_MSG_GetDcModeMaxDpmFreq 0xC +#define DALSMC_MSG_SetMinDeepSleepDcfclk 0xD +#define DALSMC_MSG_NumOfDisplays 0xE +#define DALSMC_MSG_SetExternalClientDfCstateAllow 0xF +#define DALSMC_MSG_BacoAudioD3PME 0x10 +#define DALSMC_MSG_SetFclkSwitchAllow 0x11 +#define DALSMC_MSG_SetCabForUclkPstate 0x12 +#define DALSMC_MSG_SetWorstCaseUclkLatency 0x13 +#define DALSMC_Message_Count 0x14 + +typedef enum { + FCLK_SWITCH_DISALLOW, + FCLK_SWITCH_ALLOW, +} FclkSwitchAllow_e; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c new file mode 100644 index 000000000000..d146c35f6d60 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -0,0 +1,1006 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dccg.h" +#include "clk_mgr_internal.h" +#include "dcn401/dcn401_clk_mgr_smu_msg.h" +#include "dcn20/dcn20_clk_mgr.h" +#include "dce100/dce_clk_mgr.h" +#include "dcn31/dcn31_clk_mgr.h" +#include "dcn32/dcn32_clk_mgr.h" +#include "dcn401/dcn401_clk_mgr.h" +#include "reg_helper.h" +#include "core_types.h" +#include "dm_helpers.h" +#include "link.h" +#include "atomfirmware.h" + +#include "dcn401_smu14_driver_if.h" + +#include "dcn/dcn_4_1_0_offset.h" +#include "dcn/dcn_4_1_0_sh_mask.h" + +#include "dcn401/dcn401_clk_mgr.h" +#include "dml/dcn401/dcn401_fpu.h" + +#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37 +#define mmCLK01_CLK0_CLK0_DFS_CNTL 0x16E69 +#define mmCLK01_CLK0_CLK1_DFS_CNTL 0x16E6C +#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F +#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72 +#define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75 + +#define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL +#define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL +#define CLK0_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000UL +#define CLK0_CLK_PLL_REQ__FbMult_int__SHIFT 0x00000000 +#define CLK0_CLK_PLL_REQ__PllSpineDiv__SHIFT 0x0000000c +#define CLK0_CLK_PLL_REQ__FbMult_frac__SHIFT 0x00000010 + +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name + +#define REG(reg) \ + (clk_mgr->regs->reg) + +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define CLK_SR_DCN401(reg_name, block, inst)\ + .reg_name = mm ## block ## _ ## reg_name + +static const struct clk_mgr_registers clk_mgr_regs_dcn401 = { + CLK_REG_LIST_DCN401() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn401 = { + CLK_COMMON_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask_dcn401 = { + CLK_COMMON_MASK_SH_LIST_DCN401(_MASK) +}; + +static bool dcn401_is_ppclk_dpm_enabled(struct clk_mgr_internal *clk_mgr, PPCLK_e clk) +{ + bool ppclk_dpm_enabled = false; + + switch (clk) { + case PPCLK_SOCCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_socclk_levels > 1; + break; + case PPCLK_UCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_memclk_levels > 1; + break; + case PPCLK_FCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels > 1; + break; + case PPCLK_DISPCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels > 1; + break; + case PPCLK_DPPCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels > 1; + break; + case PPCLK_DPREFCLK: + ppclk_dpm_enabled = false; + break; + case PPCLK_DCFCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels > 1; + break; + case PPCLK_DTBCLK: + ppclk_dpm_enabled = + clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels > 1; + break; + default: + ppclk_dpm_enabled = false; + } + + ppclk_dpm_enabled &= clk_mgr->smu_present; + + return ppclk_dpm_enabled; +} + +/* Query SMU for all clock states for a particular clock */ +static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0, + unsigned int *num_levels) +{ + unsigned int i; + char *entry_i = (char *)entry_0; + + uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); + + if (ret & (1 << 31)) + /* fine-grained, only min and max */ + *num_levels = 2; + else + /* discrete, a number of fixed states */ + /* will set num_levels to 0 on failure */ + *num_levels = ret & 0xFF; + + /* if the initial message failed, num_levels will be 0 */ + for (i = 0; i < *num_levels; i++) { + *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); + entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]); + } +} + +static void dcn401_build_wm_range_table(struct clk_mgr *clk_mgr) +{ + /* legacy */ + DC_FP_START(); + dcn401_build_wm_range_table_fpu(clk_mgr); + DC_FP_END(); + + if (clk_mgr->ctx->dc->debug.using_dml21) { + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Unused on dcn4 */ + clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = false; + + /* Set 1A - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_uclk = 0xFFFF; + } else { + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = false; + } + + /* Set 1B - Unused on dcn4 */ + clk_mgr->bw_params->wm_table.nv_entries[WM_1B].valid = false; + } +} + +void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + unsigned int num_levels; + struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk; + unsigned int i; + + memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks)); + clk_mgr_base->clks.p_state_change_support = true; + clk_mgr_base->clks.prev_p_state_change_support = true; + clk_mgr_base->clks.fclk_prev_p_state_change_support = true; + clk_mgr->smu_present = false; + clk_mgr->dpm_present = false; + + if (!clk_mgr_base->bw_params) + return; + + if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) + clk_mgr->smu_present = true; + + if (!clk_mgr->smu_present) + return; + + dcn30_smu_check_driver_if_version(clk_mgr); + dcn30_smu_check_msg_header_version(clk_mgr); + + /* DCFCLK */ + dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz, + &num_entries_per_clk->num_dcfclk_levels); + clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); + + /* SOCCLK */ + dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, + &num_entries_per_clk->num_socclk_levels); + clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); + + /* DTBCLK */ + if (!clk_mgr->base.ctx->dc->debug.disable_dtb_ref_clk_switch) { + dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, + &num_entries_per_clk->num_dtbclk_levels); + clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = + dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); + } + + /* DISPCLK */ + dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz, + &num_entries_per_clk->num_dispclk_levels); + num_levels = num_entries_per_clk->num_dispclk_levels; + clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); + + /* DPPCLK */ + dcn401_init_single_clock(clk_mgr, PPCLK_DPPCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].dppclk_mhz, + &num_entries_per_clk->num_dppclk_levels); + num_levels = num_entries_per_clk->num_dppclk_levels; + + if (num_entries_per_clk->num_dcfclk_levels && + num_entries_per_clk->num_dtbclk_levels && + num_entries_per_clk->num_dispclk_levels) + clk_mgr->dpm_present = true; + + if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) { + for (i = 0; i < num_levels; i++) + if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz + < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz)) + clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz + = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz); + } + + if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) { + for (i = 0; i < num_levels; i++) + if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz + < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz)) + clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz + = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz); + } + + /* Get UCLK, update bounding box */ + clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base); + + /* WM range table */ + dcn401_build_wm_range_table(clk_mgr_base); +} + +static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, + struct dc_state *context, + int ref_dtbclk_khz) +{ + struct dccg *dccg = clk_mgr->dccg; + uint32_t tg_mask = 0; + int i; + + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + /* use mask to program DTO once per tg */ + if (pipe_ctx->stream_res.tg && + !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) { + tg_mask |= (1 << pipe_ctx->stream_res.tg->inst); + + if (dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { + pipe_ctx->clock_source->funcs->program_pix_clk( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + dccg->ctx->dc->link_srv->dp_get_encoding_format(&pipe_ctx->link_config.dp_link_settings), + &pipe_ctx->pll_settings); + } + + } + } +} + +void dcn401_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, + struct dc_state *context, bool safe_to_lower, int dppclk_khz) +{ + int i; + + clk_mgr->dccg->ref_dppclk = dppclk_khz; + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + int dpp_inst = 0, dppclk_khz, prev_dppclk_khz; + + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + + if (context->res_ctx.pipe_ctx[i].plane_res.dpp) + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) { + /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting. + * In this case just continue in loop + */ + continue; + } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) { + /* The software state is not valid if dpp resource is NULL and + * dppclk_khz > 0. + */ + ASSERT(false); + continue; + } + + prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i]; + + if (safe_to_lower || prev_dppclk_khz < dppclk_khz) + clk_mgr->dccg->funcs->update_dpp_dto( + clk_mgr->dccg, dpp_inst, dppclk_khz); + } +} + +static int dcn401_set_hard_min_by_freq_optimized(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, int requested_clk_khz) +{ + if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, clk)) + return 0; + + /* + * SMU set hard min interface takes requested clock in mhz and return + * actual clock configured in khz. If we floor requested clk to mhz, + * there is a chance that the actual clock configured in khz is less + * than requested. If we ceil it to mhz, there is a chance that it + * unnecessarily dumps up to a higher dpm level, which burns more power. + * The solution is to set by flooring it to mhz first. If the actual + * clock returned is less than requested, then we will ceil the + * requested value to mhz and call it again. + */ + int actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_floor(requested_clk_khz)); + + if (actual_clk_khz < requested_clk_khz) + actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_ceil(requested_clk_khz)); + + return actual_clk_khz; +} + +static void dcn401_update_clocks_update_dentist( + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + uint32_t new_disp_divider = 0; + uint32_t new_dispclk_wdivider = 0; + uint32_t old_dispclk_wdivider = 0; + uint32_t i; + uint32_t dentist_dispclk_wdivider_readback = 0; + struct dc *dc = clk_mgr->base.ctx->dc; + + if (clk_mgr->base.clks.dispclk_khz == 0) + return; + + new_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz; + + new_dispclk_wdivider = dentist_get_did_from_divider(new_disp_divider); + REG_GET(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, &old_dispclk_wdivider); + + /* When changing divider to or from 127, some extra programming is required to prevent corruption */ + if (old_dispclk_wdivider == 127 && new_dispclk_wdivider != 127) { + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + uint32_t fifo_level; + struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg; + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + int32_t N; + int32_t j; + + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) + continue; + /* Virtual encoders don't have this function */ + if (!stream_enc->funcs->get_fifo_cal_average_level) + continue; + fifo_level = stream_enc->funcs->get_fifo_cal_average_level( + stream_enc); + N = fifo_level / 4; + dccg->funcs->set_fifo_errdet_ovr_en( + dccg, + true); + for (j = 0; j < N - 4; j++) + dccg->funcs->otg_drop_pixel( + dccg, + pipe_ctx->stream_res.tg->inst); + dccg->funcs->set_fifo_errdet_ovr_en( + dccg, + false); + } + } else if (new_dispclk_wdivider == 127 && old_dispclk_wdivider != 127) { + /* request clock with 126 divider first */ + uint32_t temp_disp_divider = dentist_get_divider_from_did(126); + uint32_t temp_dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / temp_disp_divider; + + if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK)) + dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, + temp_dispclk_khz); + + if (dc->debug.override_dispclk_programming) { + REG_GET(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback); + + if (dentist_dispclk_wdivider_readback != 126) { + REG_UPDATE(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, 126); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000); + } + } + + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg; + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + uint32_t fifo_level; + int32_t N; + int32_t j; + + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) + continue; + /* Virtual encoders don't have this function */ + if (!stream_enc->funcs->get_fifo_cal_average_level) + continue; + fifo_level = stream_enc->funcs->get_fifo_cal_average_level( + stream_enc); + N = fifo_level / 4; + dccg->funcs->set_fifo_errdet_ovr_en(dccg, true); + for (j = 0; j < 12 - N; j++) + dccg->funcs->otg_add_pixel(dccg, + pipe_ctx->stream_res.tg->inst); + dccg->funcs->set_fifo_errdet_ovr_en(dccg, false); + } + } + + /* do requested DISPCLK updates*/ + if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK)) + dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, + clk_mgr->base.clks.dispclk_khz); + + if (dc->debug.override_dispclk_programming) { + REG_GET(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback); + + if (dentist_dispclk_wdivider_readback > new_dispclk_wdivider) { + REG_UPDATE(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, new_dispclk_wdivider); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000); + } + } + +} + +static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; + struct dc *dc = clk_mgr_base->ctx->dc; + int display_count; + bool update_dppclk = false; + bool update_dispclk = false; + bool enter_display_off = false; + bool dpp_clock_lowered = false; + struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; + bool force_reset = false; + bool update_uclk = false, update_fclk = false; + bool p_state_change_support; + bool fclk_p_state_change_support; + int total_plane_count; + + if (dc->work_arounds.skip_clock_update) + return; + + if (clk_mgr_base->clks.dispclk_khz == 0 || + (dc->debug.force_clock_mode & 0x1)) { + /* This is from resume or boot up, if forced_clock cfg option used, + * we bypass program dispclk and DPPCLK, but need set them for S3. + */ + force_reset = true; + + dcn2_read_clocks_from_hw_dentist(clk_mgr_base); + + /* Force_clock_mode 0x1: force reset the clock even it is the same clock + * as long as it is in Passive level. + */ + } + display_count = clk_mgr_helper_get_active_display_cnt(dc, context); + + if (display_count == 0) + enter_display_off = true; + + if (clk_mgr->smu_present) { + if (enter_display_off == safe_to_lower) + dcn30_smu_set_num_of_displays(clk_mgr, display_count); + + clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support; + + total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); + fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0); + + if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support)) { + clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support; + + /* To enable FCLK P-state switching, send FCLK_PSTATE_SUPPORTED message to PMFW */ + if (clk_mgr_base->clks.fclk_p_state_change_support) { + /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */ + dcn401_smu_send_fclk_pstate_message(clk_mgr, FCLK_PSTATE_SUPPORTED); + } + } + + if (dc->debug.force_min_dcfclk_mhz > 0) + new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? + new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); + + if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { + clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; + if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); + } + + if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { + clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; + if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK)) + dcn30_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); + } + + if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) + /* We don't actually care about socclk, don't notify SMU of hard min */ + clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; + + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; + clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways; + + if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && + clk_mgr_base->clks.num_ways < new_clocks->num_ways) { + clk_mgr_base->clks.num_ways = new_clocks->num_ways; + if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); + } + + + p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0); + if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + clk_mgr_base->clks.p_state_change_support = p_state_change_support; + clk_mgr_base->clks.fw_based_mclk_switching = p_state_change_support && new_clocks->fw_based_mclk_switching; + + /* to disable P-State switching, set UCLK min = max */ + if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz); + } + + /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */ + if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) { + update_fclk = true; + } + + if (!clk_mgr_base->clks.fclk_p_state_change_support && + update_fclk && + dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_FCLK)) { + /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */ + dcn401_smu_send_fclk_pstate_message(clk_mgr, FCLK_PSTATE_NOTSUPPORTED); + } + + /* Always update saved value, even if new value not set due to P-State switching unsupported */ + if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { + clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; + update_uclk = true; + } + + /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ + if (clk_mgr_base->clks.p_state_change_support && + (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) && + dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); + + if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && + clk_mgr_base->clks.num_ways > new_clocks->num_ways) { + clk_mgr_base->clks.num_ways = new_clocks->num_ways; + if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways); + } + } + + if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { + if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) + dpp_clock_lowered = true; + + clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; + clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz; + + if (clk_mgr->smu_present && !dpp_clock_lowered && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) + clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, clk_mgr_base->clks.dppclk_khz); + update_dppclk = true; + } + + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; + + update_dispclk = true; + } + + if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { + new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; + } + + /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ + if (!dc->debug.disable_dtb_ref_clk_switch && + should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && + dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { + /* DCCG requires KHz precision for DTBCLK */ + clk_mgr_base->clks.ref_dtbclk_khz = + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); + + dcn401_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); + } + + if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { + if (dpp_clock_lowered) { + /* if clock is being lowered, increase DTO before lowering refclk */ + dcn401_update_clocks_update_dpp_dto(clk_mgr, context, + safe_to_lower, clk_mgr_base->clks.dppclk_khz); + dcn401_update_clocks_update_dentist(clk_mgr, context); + if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) { + clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, + clk_mgr_base->clks.dppclk_khz); + dcn401_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower, + clk_mgr_base->clks.actual_dppclk_khz); + } + + } else { + /* if clock is being raised, increase refclk before lowering DTO */ + if (update_dppclk || update_dispclk) + dcn401_update_clocks_update_dentist(clk_mgr, context); + /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures + * that we do not lower dto when it is not safe to lower. We do not need to + * compare the current and new dppclk before calling this function. + */ + dcn401_update_clocks_update_dpp_dto(clk_mgr, context, + safe_to_lower, clk_mgr_base->clks.actual_dppclk_khz); + } + } + + if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) + /*update dmcu for wait_loop count*/ + dmcu->funcs->set_psr_wait_loop(dmcu, + clk_mgr_base->clks.dispclk_khz / 1000 / 7); +} + +static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) +{ + struct fixed31_32 pll_req; + uint32_t pll_req_reg = 0; + + /* get FbMult value */ + pll_req_reg = REG_READ(CLK0_CLK_PLL_REQ); + + /* set up a fixed-point number + * this works because the int part is on the right edge of the register + * and the frac part is on the left edge + */ + pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int); + pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + return dc_fixpt_floor(pll_req); +} + +static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dprefclk_did = 0; + uint32_t dcfclk_did = 0; + uint32_t dtbclk_did = 0; + uint32_t dispclk_did = 0; + uint32_t dppclk_did = 0; + uint32_t target_div = 0; + + /* DFS Slice 0 is used for DISPCLK */ + dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); + /* DFS Slice 1 is used for DPPCLK */ + dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); + /* DFS Slice 2 is used for DPREFCLK */ + dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); + /* DFS Slice 3 is used for DCFCLK */ + dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); + /* DFS Slice 4 is used for DTBCLK */ + dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dispclk_did); + //Get dispclk in khz + regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dppclk_did); + //Get dppclk in khz + regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DPREFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dprefclk_did); + //Get dprefclk in khz + regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DCFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dcfclk_did); + //Get dcfclk in khz + regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dtbclk_did); + //Get dtbclk in khz + regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; +} + +static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr) +{ + struct dc_bios *bp = clk_mgr->base.ctx->dc_bios; + int ss_info_num = bp->funcs->get_ss_entry_number( + bp, AS_SIGNAL_TYPE_GPU_PLL); + + if (ss_info_num) { + struct spread_spectrum_info info = { { 0 } }; + enum bp_result result = bp->funcs->get_spread_spectrum_info( + bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info); + + /* SSInfo.spreadSpectrumPercentage !=0 would be sign + * that SS is enabled + */ + if (result == BP_RESULT_OK && + info.spread_spectrum_percentage != 0) { + clk_mgr->ss_on_dprefclk = true; + clk_mgr->dprefclk_ss_divider = info.spread_percentage_divider; + + if (info.type.CENTER_MODE == 0) { + /* Currently for DP Reference clock we + * need only SS percentage for + * downspread + */ + clk_mgr->dprefclk_ss_percentage = + info.spread_spectrum_percentage; + } + } + } +} +static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) +{ + unsigned int i; + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table; + + if (!clk_mgr->smu_present) + return; + + if (!table) + return; + + memset(table, 0, sizeof(*table)); + + /* collect valid ranges, place in pmfw table */ + for (i = 0; i < WM_SET_COUNT; i++) + if (clk_mgr->base.bw_params->wm_table.nv_entries[i].valid) { + table->Watermarks.WatermarkRow[i].WmSetting = i; + table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type; + } + dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); + dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); + dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr); +} + +/* Set min memclk to minimum, either constrained by the current mode or DPM0 */ +static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + return; + + if (current_mode) { + if (clk_mgr_base->clks.p_state_change_support) + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); + else + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + clk_mgr_base->bw_params->max_memclk_mhz); + } else { + dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + } +} + +/* Set max memclk to highest DPM value */ +static void dcn401_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) + return; + + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, + clk_mgr_base->bw_params->max_memclk_mhz); +} + +/* Get current memclk states, update bounding box */ +static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk; + unsigned int num_levels; + + if (!clk_mgr->smu_present) + return; + + /* Refresh memclk and fclk states */ + dcn401_init_single_clock(clk_mgr, PPCLK_UCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz, + &num_entries_per_clk->num_memclk_levels); + if (num_entries_per_clk->num_memclk_levels) { + clk_mgr_base->bw_params->max_memclk_mhz = + clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; + } + + clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + clk_mgr_base->bw_params->dc_mode_softmax_memclk = clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz; + + dcn401_init_single_clock(clk_mgr, PPCLK_FCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, + &num_entries_per_clk->num_fclk_levels); + clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); + + if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) { + num_levels = num_entries_per_clk->num_memclk_levels; + } else { + num_levels = num_entries_per_clk->num_fclk_levels; + } + + clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; + + if (clk_mgr->dpm_present && !num_levels) + clk_mgr->dpm_present = false; + + /* Refresh bounding box */ + clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( + clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); +} + +static bool dcn401_are_clock_states_equal(struct dc_clocks *a, + struct dc_clocks *b) +{ + if (a->dispclk_khz != b->dispclk_khz) + return false; + else if (a->dppclk_khz != b->dppclk_khz) + return false; + else if (a->dcfclk_khz != b->dcfclk_khz) + return false; + else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) + return false; + else if (a->dramclk_khz != b->dramclk_khz) + return false; + else if (a->p_state_change_support != b->p_state_change_support) + return false; + else if (a->fclk_p_state_change_support != b->fclk_p_state_change_support) + return false; + + return true; +} + +static void dcn401_enable_pme_wa(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + if (!clk_mgr->smu_present) + return; + + dcn401_smu_set_pme_workaround(clk_mgr); +} + +static bool dcn401_is_smu_present(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + return clk_mgr->smu_present; +} + + +static int dcn401_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + int dtb_ref_clk_khz = 0; + + if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) { + /* DPM enabled, use currently set value */ + dtb_ref_clk_khz = clk_mgr_base->clks.ref_dtbclk_khz; + } else { + /* DPM disabled, so use boot snapshot */ + dtb_ref_clk_khz = clk_mgr_base->boot_snapshot.dtbclk; + } + + return dtb_ref_clk_khz; +} + +static struct clk_mgr_funcs dcn401_funcs = { + .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .get_dtb_ref_clk_frequency = dcn401_get_dtb_ref_freq_khz, + .update_clocks = dcn401_update_clocks, + .dump_clk_registers = dcn401_dump_clk_registers, + .init_clocks = dcn401_init_clocks, + .notify_wm_ranges = dcn401_notify_wm_ranges, + .set_hard_min_memclk = dcn401_set_hard_min_memclk, + .set_hard_max_memclk = dcn401_set_hard_max_memclk, + .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu, + .are_clock_states_equal = dcn401_are_clock_states_equal, + .enable_pme_wa = dcn401_enable_pme_wa, + .is_smu_present = dcn401_is_smu_present, +}; + +void dcn401_clk_mgr_construct( + struct dc_context *ctx, + struct clk_mgr_internal *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg) +{ + struct clk_log_info log_info = {0}; + + clk_mgr->base.ctx = ctx; + clk_mgr->base.funcs = &dcn401_funcs; + clk_mgr->regs = &clk_mgr_regs_dcn401; + clk_mgr->clk_mgr_shift = &clk_mgr_shift_dcn401; + clk_mgr->clk_mgr_mask = &clk_mgr_mask_dcn401; + + clk_mgr->dccg = dccg; + clk_mgr->dfs_bypass_disp_clk = 0; + + clk_mgr->dprefclk_ss_percentage = 0; + clk_mgr->dprefclk_ss_divider = 1000; + clk_mgr->ss_on_dprefclk = false; + clk_mgr->dfs_ref_freq_khz = 100000; + + /* Changed from DCN3.2_clock_frequency doc to match + * dcn401_dump_clk_registers from 4 * dentist_vco_freq_khz / + * dprefclk DID divider + */ + clk_mgr->base.dprefclk_khz = 720000; //TODO update from VBIOS + + /* integer part is now VCO frequency in kHz */ + clk_mgr->base.dentist_vco_freq_khz = dcn401_get_vco_frequency_from_reg(clk_mgr); + + /* in case we don't get a value from the register, use default */ + if (clk_mgr->base.dentist_vco_freq_khz == 0) + clk_mgr->base.dentist_vco_freq_khz = 4500000; //TODO Update from VBIOS + + dcn401_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info); + + if (ctx->dc->debug.disable_dtb_ref_clk_switch && + clk_mgr->base.clks.ref_dtbclk_khz != clk_mgr->base.boot_snapshot.dtbclk) { + clk_mgr->base.clks.ref_dtbclk_khz = clk_mgr->base.boot_snapshot.dtbclk; + } + + if (clk_mgr->base.boot_snapshot.dprefclk != 0) { + clk_mgr->base.dprefclk_khz = clk_mgr->base.boot_snapshot.dprefclk; + } + dcn401_clock_read_ss_info(clk_mgr); + + clk_mgr->dfs_bypass_enabled = false; + + clk_mgr->smu_present = false; + + clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); + + /* need physical address of table to give to PMFW */ + clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx, + DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t), + &clk_mgr->wm_range_table_addr); +} + +void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr) +{ + kfree(clk_mgr->base.bw_params); + + if (clk_mgr->wm_range_table) + dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, + clk_mgr->wm_range_table); +} + diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h new file mode 100644 index 000000000000..496540ec1950 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DCN401_CLK_MGR_H_ +#define __DCN401_CLK_MGR_H_ + +void dcn401_init_clocks(struct clk_mgr *clk_mgr_base); + +void dcn401_clk_mgr_construct(struct dc_context *ctx, + struct clk_mgr_internal *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg); + +void dcn401_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, + struct dc_state *context, bool safe_to_lower, int dppclk_khz); + +void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr); + + + +#endif /* __DCN401_CLK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c new file mode 100644 index 000000000000..054e8bd686f1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dcn401_clk_mgr_smu_msg.h" + +#include "clk_mgr_internal.h" +#include "reg_helper.h" + +#include "dalsmc.h" +#include "dcn401_smu14_driver_if.h" + +#define mmDAL_MSG_REG 0x1628A +#define mmDAL_ARG_REG 0x16273 +#define mmDAL_RESP_REG 0x16274 + +#define REG(reg_name) \ + mm ## reg_name + +#include "logger_types.h" + +#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } + +/* + * Function to be used instead of REG_WAIT macro because the wait ends when + * the register is NOT EQUAL to zero, and because the translation in msg_if.h + * won't work with REG_WAIT. + */ +static uint32_t dcn401_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries) +{ + uint32_t reg = 0; + + do { + reg = REG_READ(DAL_RESP_REG); + if (reg) + break; + + if (delay_us >= 1000) + msleep(delay_us/1000); + else if (delay_us > 0) + udelay(delay_us); + } while (max_retries--); + + return reg; +} + +static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out) +{ + /* Wait for response register to be ready */ + dcn401_smu_wait_for_response(clk_mgr, 10, 200000); + + /* Clear response register */ + REG_WRITE(DAL_RESP_REG, 0); + + /* Set the parameter register for the SMU message */ + REG_WRITE(DAL_ARG_REG, param_in); + + /* Trigger the message transaction by writing the message ID */ + REG_WRITE(DAL_MSG_REG, msg_id); + + /* Wait for response */ + if (dcn401_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) { + if (param_out) + *param_out = REG_READ(DAL_ARG_REG); + + return true; + } + + return false; +} + +void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable) +{ + smu_print("FCLK P-state support value is : %d\n", enable); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetFclkSwitchAllow, enable ? FCLK_PSTATE_SUPPORTED : FCLK_PSTATE_NOTSUPPORTED, NULL); +} + +void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways) +{ + uint32_t param = (num_ways << 1) | (num_ways > 0); + + dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetCabForUclkPstate, param, NULL); + smu_print("Numways for SubVP : %d\n", num_ways); +} + +void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) +{ + smu_print("SMU Transfer WM table DRAM 2 SMU\n"); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS, NULL); +} + +void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr) +{ + smu_print("SMU Set PME workaround\n"); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_BacoAudioD3PME, 0, NULL); +} + +/* Returns the actual frequency that was set in MHz, 0 on failure */ +unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */ + uint32_t param = (clk << 16) | freq_mhz; + + smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetHardMinByFreq, param, &response); + + smu_print("SMU Frequency set = %d KHz\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h new file mode 100644 index 000000000000..8918bc52c2af --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DCN401_CLK_MGR_SMU_MSG_H_ +#define __DCN401_CLK_MGR_SMU_MSG_H_ + +#include "os_types.h" +#include "core_types.h" +#include "dcn32/dcn32_clk_mgr_smu_msg.h" + +#define FCLK_PSTATE_NOTSUPPORTED 0x00 +#define FCLK_PSTATE_SUPPORTED 0x01 + +void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable); +void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways); +void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); +void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz); + +#endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h new file mode 100644 index 000000000000..36034b32870c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. +// +// This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces. + +#define SMU14_DRIVER_IF_VERSION 0x1 + +//Only Clks that have DPM descriptors are listed here +typedef enum { + PPCLK_GFXCLK = 0, + PPCLK_SOCCLK, + PPCLK_UCLK, + PPCLK_FCLK, + PPCLK_DCLK_0, + PPCLK_VCLK_0, + PPCLK_DISPCLK, + PPCLK_DPPCLK, + PPCLK_DPREFCLK, + PPCLK_DCFCLK, + PPCLK_DTBCLK, + PPCLK_COUNT, +} PPCLK_e; + +typedef struct { + uint8_t WmSetting; + uint8_t Flags; + uint8_t Padding[2]; + +} WatermarkRowGeneric_t; + +#define NUM_WM_RANGES 4 + +typedef enum { + WATERMARKS_CLOCK_RANGE = 0, + WATERMARKS_DUMMY_PSTATE, + WATERMARKS_MALL, + WATERMARKS_COUNT, +} WATERMARKS_FLAGS_e; + +typedef struct { + // Watermarks + WatermarkRowGeneric_t WatermarkRow[NUM_WM_RANGES]; +} Watermarks_t; + +typedef struct { + Watermarks_t Watermarks; + uint32_t Spare[16]; + + uint32_t MmHubPadding[8]; // SMU internal use +} WatermarksExternal_t; + +// Table types +#define TABLE_PMFW_PPTABLE 0 +#define TABLE_COMBO_PPTABLE 1 +#define TABLE_WATERMARKS 2 +#define TABLE_AVFS_PSM_DEBUG 3 +#define TABLE_PMSTATUSLOG 4 +#define TABLE_SMU_METRICS 5 +#define TABLE_DRIVER_SMU_CONFIG 6 +#define TABLE_ACTIVITY_MONITOR_COEFF 7 +#define TABLE_OVERDRIVE 8 +#define TABLE_I2C_COMMANDS 9 +#define TABLE_DRIVER_INFO 10 +#define TABLE_ECCINFO 11 +#define TABLE_COUNT 12 diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c new file mode 100644 index 000000000000..daf97688e901 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_translate.h" +#include "spl/dc_spl_types.h" +#include "dcn20/dcn20_dpp.h" +#include "dcn32/dcn32_dpp.h" +#include "dcn401/dcn401_dpp.h" + +static struct spl_funcs dcn2_spl_funcs = { + .spl_calc_lb_num_partitions = dscl2_spl_calc_lb_num_partitions, +}; +static struct spl_funcs dcn32_spl_funcs = { + .spl_calc_lb_num_partitions = dscl32_spl_calc_lb_num_partitions, +}; +static struct spl_funcs dcn401_spl_funcs = { + .spl_calc_lb_num_partitions = dscl401_spl_calc_lb_num_partitions, +}; +static void populate_splrect_from_rect(struct spl_rect *spl_rect, const struct rect *rect) +{ + spl_rect->x = rect->x; + spl_rect->y = rect->y; + spl_rect->width = rect->width; + spl_rect->height = rect->height; +} +static void populate_rect_from_splrect(struct rect *rect, const struct spl_rect *spl_rect) +{ + rect->x = spl_rect->x; + rect->y = spl_rect->y; + rect->width = spl_rect->width; + rect->height = spl_rect->height; +} +static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, + const struct scaling_taps *scaling_quality) +{ + spl_scaling_quality->h_taps_c = scaling_quality->h_taps_c; + spl_scaling_quality->h_taps = scaling_quality->h_taps; + spl_scaling_quality->v_taps_c = scaling_quality->v_taps_c; + spl_scaling_quality->v_taps = scaling_quality->v_taps; +} +static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, + const struct spl_taps *spl_scaling_quality) +{ + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; + scaling_quality->h_taps = spl_scaling_quality->h_taps; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; + scaling_quality->v_taps = spl_scaling_quality->v_taps; +} +static void populate_ratios_from_splratios(struct scaling_ratios *ratios, + const struct spl_ratios *spl_ratios) +{ + ratios->horz = spl_ratios->horz; + ratios->vert = spl_ratios->vert; + ratios->horz_c = spl_ratios->horz_c; + ratios->vert_c = spl_ratios->vert_c; +} +static void populate_inits_from_splinits(struct scl_inits *inits, + const struct spl_inits *spl_inits) +{ + inits->h = spl_inits->h; + inits->v = spl_inits->v; + inits->h_c = spl_inits->h_c; + inits->v_c = spl_inits->v_c; +} +/// @brief Translate SPL input parameters from pipe context +/// @param pipe_ctx +/// @param spl_in +void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in) +{ + const struct dc_plane_state *plane_state = pipe_ctx->plane_state; + const struct dc_stream_state *stream = pipe_ctx->stream; + // Assign the function to calculate the number of partitions in the line buffer + // This is used to determine the vtap support + switch (plane_state->ctx->dce_version) { + case DCN_VERSION_2_0: + spl_in->funcs = &dcn2_spl_funcs; + break; + case DCN_VERSION_3_2: + spl_in->funcs = &dcn32_spl_funcs; + break; + case DCN_VERSION_4_01: + spl_in->funcs = &dcn401_spl_funcs; + break; + default: + spl_in->funcs = &dcn2_spl_funcs; + } + // Make format field from spl_in point to plane_res scl_data format + spl_in->basic_in.format = (enum spl_pixel_format)pipe_ctx->plane_res.scl_data.format; + // Make view_format from basic_out point to view_format from stream + spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format; + // Populate spl input basic input clip rect from plane state clip rect + populate_splrect_from_rect(&spl_in->basic_in.clip_rect, &plane_state->clip_rect); + // Populate spl input basic out src rect from stream src rect + populate_splrect_from_rect(&spl_in->basic_out.src_rect, &stream->src); + // Populate spl input basic out dst rect from stream dst rect + populate_splrect_from_rect(&spl_in->basic_out.dst_rect, &stream->dst); + // Make spl input basic input info rotation field point to plane state rotation + spl_in->basic_in.rotation = (enum spl_rotation_angle)plane_state->rotation; + // Populate spl input basic input src rect from plane state src rect + populate_splrect_from_rect(&spl_in->basic_in.src_rect, &plane_state->src_rect); + // Populate spl input basic input dst rect from plane state dst rect + populate_splrect_from_rect(&spl_in->basic_in.dst_rect, &plane_state->dst_rect); + // Make spl input basic input info horiz mirror field point to plane state horz mirror + spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror; + + // Calculate horizontal splits and split index + spl_in->basic_in.mpc_combine_h = resource_get_mpc_slice_count(pipe_ctx); + + if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE) + spl_in->basic_in.mpc_combine_v = 0; + else + spl_in->basic_in.mpc_combine_v = resource_get_mpc_slice_index(pipe_ctx); + + spl_in->basic_out.odm_combine_factor = resource_get_odm_slice_count(pipe_ctx); + spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx); + // Make spl input basic out info output_size width point to stream h active + spl_in->basic_out.output_size.width = + stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + // Make spl input basic out info output_size height point to v active + spl_in->basic_out.output_size.height = + stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + spl_in->basic_out.max_downscale_src_width = + pipe_ctx->stream->ctx->dc->debug.max_downscale_src_width; + spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale; + // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en + spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en; + // Make spl input basic input info scaling quality field point to plane state scaling_quality + populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality); + // Translate edge adaptive scaler preference + spl_in->prefer_easf = pipe_ctx->stream->ctx->dc->config.prefer_easf; + // Translate adaptive sharpening preference + spl_in->adaptive_sharpness.enable = plane_state->adaptive_sharpness_en; + if (plane_state->sharpnessX1000 == 0) { + spl_in->adaptive_sharpness.enable = false; + } else if (plane_state->sharpnessX1000 < 999) { + spl_in->adaptive_sharpness.sharpness = SHARPNESS_LOW; + } else if (plane_state->sharpnessX1000 < 1999) { + spl_in->adaptive_sharpness.sharpness = SHARPNESS_MID; + } else { // Any other value is high sharpness + spl_in->adaptive_sharpness.sharpness = SHARPNESS_HIGH; + } + // Translate linear light scaling preference + spl_in->lls_pref = plane_state->linear_light_scaling; + + /* Translate chroma subsampling offset ( cositing ) */ + if (pipe_ctx->stream->ctx->dc->debug.force_cositing) + spl_in->basic_in.cositing = pipe_ctx->stream->ctx->dc->debug.force_cositing - 1; + else + spl_in->basic_in.cositing = plane_state->cositing; +} + +/// @brief Translate SPL output parameters to pipe context +/// @param pipe_ctx +/// @param spl_out +void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) +{ + // Make scaler data recout point to spl output field recout + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); + // Make scaler data ratios point to spl output field ratios + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); + // Make scaler data viewport point to spl output field viewport + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); + // Make scaler data viewport_c point to spl output field viewport_c + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); + // Make scaler data taps point to spl output field scaling taps + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); + // Make scaler data init point to spl output field init + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h new file mode 100644 index 000000000000..c73d640c3632 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_SPL_TRANSLATE_H__ +#define __DC_SPL_TRANSLATE_H__ +#include "dc.h" +#include "resource.h" + +/* Map SPL input parameters to pipe context + * @pipe_ctx: pipe context + * @spl_in: spl input structure + */ +void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in); + +/* Map SPL output parameters to pipe context + * @pipe_ctx: pipe context + * @spl_out: spl output structure + */ +void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out); + +#endif /* __DC_SPL_TRANSLATE_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile new file mode 100644 index 000000000000..2e15e639194d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile @@ -0,0 +1,14 @@ +# +# Copyright © 2023 Advanced Micro Devices, Inc. All rights reserved. +# + +DCN401 += dcn401_dio_link_encoder.o +DCN401 += dcn401_dio_stream_encoder.o +DCN401 += dcn401_hubp.o +DCN401 += dcn401_mpc.o +DCN401 += dcn401_dccg.o +DCN401 += dcn401_hubbub.o + +AMD_DAL_DCN401 = $(addprefix $(AMDDALPATH)/dc/dcn401/,$(DCN401)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DCN401) diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.c new file mode 100644 index 000000000000..c06bf4a38dbc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.c @@ -0,0 +1,846 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "core_types.h" +#include "dcn401_dccg.h" +#include "dcn31/dcn31_dccg.h" + +/* +#include "dmub_common.h" +#include "dmcub_reg_access_helper.h" + +#include "dmub401_common.h" +#include "dmub401_regs.h" +#include "dmub401_dccg.h" +*/ + +#define TO_DCN_DCCG(dccg)\ + container_of(dccg, struct dcn_dccg, base) + +#define REG(reg) \ + (dccg_dcn->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name + +#define CTX \ + dccg_dcn->base.ctx +#define DC_LOGGER \ + dccg->ctx->logger + +static void dcn401_set_dppclk_enable(struct dccg *dccg, + uint32_t dpp_inst, uint32_t enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (dpp_inst) { + case 0: + REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable); + break; + case 1: + REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable); + break; + case 2: + REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable); + break; + case 3: + REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable); + break; + default: + break; + } +} +void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + dcn401_set_dppclk_enable(dccg, dpp_inst, true); + } else { + dcn401_set_dppclk_enable(dccg, dpp_inst, false); + } + + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +/* This function is a workaround for writing to OTG_PIXEL_RATE_DIV + * without the probability of causing a DIG FIFO error. + */ +static void dccg401_wait_for_dentist_change_done( + struct dccg *dccg) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + uint32_t dentist_dispclk_value = REG_READ(DENTIST_DISPCLK_CNTL); + + REG_WRITE(DENTIST_DISPCLK_CNTL, dentist_dispclk_value); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000); +} + +static void dccg401_get_pixel_rate_div( + struct dccg *dccg, + uint32_t otg_inst, + enum pixel_rate_div *tmds_div, + uint32_t *dp_dto_int) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + uint32_t val_tmds_div = PIXEL_RATE_DIV_NA; + + switch (otg_inst) { + case 0: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG0_TMDS_PIXEL_RATE_DIV, &val_tmds_div, + DPDTO0_INT, dp_dto_int); + break; + case 1: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG1_TMDS_PIXEL_RATE_DIV, &val_tmds_div, + DPDTO1_INT, dp_dto_int); + break; + case 2: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG2_TMDS_PIXEL_RATE_DIV, &val_tmds_div, + DPDTO2_INT, dp_dto_int); + break; + case 3: + REG_GET_2(OTG_PIXEL_RATE_DIV, + OTG3_TMDS_PIXEL_RATE_DIV, &val_tmds_div, + DPDTO3_INT, dp_dto_int); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + *tmds_div = val_tmds_div == 0 ? PIXEL_RATE_DIV_BY_2 : PIXEL_RATE_DIV_BY_4; +} + +static void dccg401_set_pixel_rate_div( + struct dccg *dccg, + uint32_t otg_inst, + enum pixel_rate_div tmds_div, + enum pixel_rate_div unused) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + enum pixel_rate_div cur_tmds_div = PIXEL_RATE_DIV_NA; + uint32_t dp_dto_int; + uint32_t reg_val; + + // only 2 and 4 are valid on dcn401 + if (tmds_div != PIXEL_RATE_DIV_BY_2 && tmds_div != PIXEL_RATE_DIV_BY_4) { + return; + } + + dccg401_get_pixel_rate_div(dccg, otg_inst, &cur_tmds_div, &dp_dto_int); + if (tmds_div == cur_tmds_div) + return; + + // encode enum to register value + reg_val = tmds_div == PIXEL_RATE_DIV_BY_4 ? 1 : 0; + + switch (otg_inst) { + case 0: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + OTG0_TMDS_PIXEL_RATE_DIV, reg_val); + + dccg401_wait_for_dentist_change_done(dccg); + break; + case 1: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + OTG1_TMDS_PIXEL_RATE_DIV, reg_val); + + dccg401_wait_for_dentist_change_done(dccg); + break; + case 2: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + OTG2_TMDS_PIXEL_RATE_DIV, reg_val); + + dccg401_wait_for_dentist_change_done(dccg); + break; + case 3: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + OTG3_TMDS_PIXEL_RATE_DIV, reg_val); + + dccg401_wait_for_dentist_change_done(dccg); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + + +static void dccg401_set_dtbclk_p_src( + struct dccg *dccg, + enum streamclk_source src, + uint32_t otg_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + uint32_t p_src_sel = 0; /* selects dprefclk */ + if (src == DTBCLK0) + p_src_sel = 2; /* selects dtbclk0 */ + + switch (otg_inst) { + case 0: + if (src == REFCLK) + REG_UPDATE(DTBCLK_P_CNTL, + DTBCLK_P0_EN, 0); + else + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P0_SRC_SEL, p_src_sel, + DTBCLK_P0_EN, 1); + break; + case 1: + if (src == REFCLK) + REG_UPDATE(DTBCLK_P_CNTL, + DTBCLK_P1_EN, 0); + else + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P1_SRC_SEL, p_src_sel, + DTBCLK_P1_EN, 1); + break; + case 2: + if (src == REFCLK) + REG_UPDATE(DTBCLK_P_CNTL, + DTBCLK_P2_EN, 0); + else + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P2_SRC_SEL, p_src_sel, + DTBCLK_P2_EN, 1); + break; + case 3: + if (src == REFCLK) + REG_UPDATE(DTBCLK_P_CNTL, + DTBCLK_P3_EN, 0); + else + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P3_SRC_SEL, p_src_sel, + DTBCLK_P3_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + +} + +void dccg401_set_physymclk( + struct dccg *dccg, + int phy_inst, + enum physymclk_clock_source clk_src, + bool force_enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */ + switch (phy_inst) { + case 0: + if (force_enable) { + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, + PHYASYMCLK_EN, 1, + PHYASYMCLK_SRC_SEL, clk_src); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, 1); + } else { + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, + PHYASYMCLK_EN, 0, + PHYASYMCLK_SRC_SEL, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, 0); + } + break; + case 1: + if (force_enable) { + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, + PHYBSYMCLK_EN, 1, + PHYBSYMCLK_SRC_SEL, clk_src); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, 1); + } else { + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, + PHYBSYMCLK_EN, 0, + PHYBSYMCLK_SRC_SEL, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, 0); + } + break; + case 2: + if (force_enable) { + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, + PHYCSYMCLK_EN, 1, + PHYCSYMCLK_SRC_SEL, clk_src); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, 1); + } else { + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, + PHYCSYMCLK_EN, 0, + PHYCSYMCLK_SRC_SEL, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, 0); + } + break; + case 3: + if (force_enable) { + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, + PHYDSYMCLK_EN, 1, + PHYDSYMCLK_SRC_SEL, clk_src); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, 1); + } else { + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, + PHYDSYMCLK_EN, 0, + PHYDSYMCLK_SRC_SEL, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, 0); + } + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg401_get_dccg_ref_freq(struct dccg *dccg, + unsigned int xtalin_freq_inKhz, + unsigned int *dccg_ref_freq_inKhz) +{ + /* + * Assume refclk is sourced from xtalin + * expect 100MHz + */ + *dccg_ref_freq_inKhz = xtalin_freq_inKhz; + return; +} + +static void dccg401_otg_add_pixel(struct dccg *dccg, + uint32_t otg_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst], + OTG_ADD_PIXEL[otg_inst], 1); +} + +static void dccg401_otg_drop_pixel(struct dccg *dccg, + uint32_t otg_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst], + OTG_DROP_PIXEL[otg_inst], 1); +} + +static void dccg401_enable_symclk32_le( + struct dccg *dccg, + int hpo_le_inst, + enum phyd32clk_clock_source phyd32clk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* select one of the PHYD32CLKs as the source for symclk32_le */ + switch (hpo_le_inst) { + case 0: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, 1); + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, phyd32clk, + SYMCLK32_LE0_EN, 1); + break; + case 1: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, 1); + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, phyd32clk, + SYMCLK32_LE1_EN, 1); + break; + case 2: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE2_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE2_GATE_DISABLE, 1); + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE2_SRC_SEL, phyd32clk, + SYMCLK32_LE2_EN, 1); + break; + case 3: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE3_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE3_GATE_DISABLE, 1); + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE3_SRC_SEL, phyd32clk, + SYMCLK32_LE3_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg401_disable_symclk32_le( + struct dccg *dccg, + int hpo_le_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* set refclk as the source for symclk32_le */ + switch (hpo_le_inst) { + case 0: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, 0, + SYMCLK32_LE0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, 0, + SYMCLK32_ROOT_LE0_GATE_DISABLE, 0); + break; + case 1: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, 0, + SYMCLK32_LE1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, 0, + SYMCLK32_ROOT_LE1_GATE_DISABLE, 0); + break; + case 2: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE2_SRC_SEL, 0, + SYMCLK32_LE2_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE2_GATE_DISABLE, 0, + SYMCLK32_ROOT_LE2_GATE_DISABLE, 0); + break; + case 3: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE3_SRC_SEL, 0, + SYMCLK32_LE3_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE3_GATE_DISABLE, 0, + SYMCLK32_ROOT_LE3_GATE_DISABLE, 0); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg401_enable_dpstreamclk(struct dccg *dccg, int otg_inst, int dp_hpo_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* enabled to select one of the DTBCLKs for pipe */ + switch (dp_hpo_inst) { + case 0: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_ROOT_GATE_DISABLE, 1, + DPSTREAMCLK0_GATE_DISABLE, 1); + REG_UPDATE_2(DPSTREAMCLK_CNTL, + DPSTREAMCLK0_SRC_SEL, otg_inst, + DPSTREAMCLK0_EN, 1); + break; + case 1: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_ROOT_GATE_DISABLE, 1, + DPSTREAMCLK1_GATE_DISABLE, 1); + REG_UPDATE_2(DPSTREAMCLK_CNTL, + DPSTREAMCLK1_SRC_SEL, otg_inst, + DPSTREAMCLK1_EN, 1); + break; + case 2: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_ROOT_GATE_DISABLE, 1, + DPSTREAMCLK2_GATE_DISABLE, 1); + REG_UPDATE_2(DPSTREAMCLK_CNTL, + DPSTREAMCLK2_SRC_SEL, otg_inst, + DPSTREAMCLK2_EN, 1); + break; + case 3: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_ROOT_GATE_DISABLE, 1, + DPSTREAMCLK3_GATE_DISABLE, 1); + REG_UPDATE_2(DPSTREAMCLK_CNTL, + DPSTREAMCLK3_SRC_SEL, otg_inst, + DPSTREAMCLK3_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + DPSTREAMCLK_GATE_DISABLE, 1, + DPSTREAMCLK_ROOT_GATE_DISABLE, 1); +} + +static void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (dp_hpo_inst) { + case 0: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_ROOT_GATE_DISABLE, 0, + DPSTREAMCLK0_GATE_DISABLE, 0); + break; + case 1: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_ROOT_GATE_DISABLE, 0, + DPSTREAMCLK1_GATE_DISABLE, 0); + break; + case 2: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK2_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_ROOT_GATE_DISABLE, 0, + DPSTREAMCLK2_GATE_DISABLE, 0); + break; + case 3: + REG_UPDATE(DPSTREAMCLK_CNTL, + DPSTREAMCLK3_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_ROOT_GATE_DISABLE, 0, + DPSTREAMCLK3_GATE_DISABLE, 0); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg401_set_dpstreamclk( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) +{ + /* set the dtbclk_p source */ + dccg401_set_dtbclk_p_src(dccg, src, otg_inst); + + /* enabled to select one of the DTBCLKs for pipe */ + if (src == REFCLK) + dccg401_disable_dpstreamclk(dccg, dp_hpo_inst); + else + dccg401_enable_dpstreamclk(dccg, otg_inst, dp_hpo_inst); +} + +static void dccg401_set_dp_dto( + struct dccg *dccg, + const struct dp_dto_params *params) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + bool enable = false; + + if (params->otg_inst > 3) { + /* dcn401 only has 4 instances */ + BREAK_TO_DEBUGGER(); + return; + } + + if (!dc_is_tmds_signal(params->signal)) { + uint64_t dto_integer; + uint64_t dto_phase_hz; + uint64_t dto_modulo_hz = params->refclk_hz; + + enable = true; + + /* Set DTO values: + * int = target_pix_rate / reference_clock + * phase = target_pix_rate - int * reference_clock, + * modulo = reference_clock */ + dto_integer = div_u64(params->pixclk_hz, dto_modulo_hz); + dto_phase_hz = params->pixclk_hz - dto_integer * dto_modulo_hz; + + if (dto_phase_hz <= 0) { + /* negative pixel rate should never happen */ + BREAK_TO_DEBUGGER(); + return; + } + + switch (params->otg_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, 1, + SYMCLK32_LE0_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, 1, + SYMCLK32_LE1_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, 1, + SYMCLK32_LE2_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE2_GATE_DISABLE, 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, 1, + SYMCLK32_LE3_GATE_DISABLE, 1, + SYMCLK32_ROOT_LE3_GATE_DISABLE, 1); + break; + } + + dccg401_set_dtbclk_p_src(dccg, params->clk_src, params->otg_inst); + + REG_WRITE(DP_DTO_PHASE[params->otg_inst], dto_phase_hz); + REG_WRITE(DP_DTO_MODULO[params->otg_inst], dto_modulo_hz); + + switch (params->otg_inst) { + case 0: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + DPDTO0_INT, dto_integer); + break; + case 1: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + DPDTO1_INT, dto_integer); + break; + case 2: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + DPDTO2_INT, dto_integer); + break; + case 3: + REG_UPDATE(OTG_PIXEL_RATE_DIV, + DPDTO3_INT, dto_integer); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + } + + /* Toggle DTO */ + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DP_DTO_ENABLE[params->otg_inst], enable, + PIPE_DTO_SRC_SEL[params->otg_inst], enable); +} + +void dccg401_init(struct dccg *dccg) +{ + /* Set HPO stream encoder to use refclk to avoid case where PHY is + * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which + * will cause DCN to hang. + */ + dccg31_disable_symclk32_se(dccg, 0); + dccg31_disable_symclk32_se(dccg, 1); + dccg31_disable_symclk32_se(dccg, 2); + dccg31_disable_symclk32_se(dccg, 3); + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) { + dccg401_disable_symclk32_le(dccg, 0); + dccg401_disable_symclk32_le(dccg, 1); + dccg401_disable_symclk32_le(dccg, 2); + dccg401_disable_symclk32_le(dccg, 3); + } + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) { + dccg401_disable_dpstreamclk(dccg, 0); + dccg401_disable_dpstreamclk(dccg, 1); + dccg401_disable_dpstreamclk(dccg, 2); + dccg401_disable_dpstreamclk(dccg, 3); + } + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) { + dccg401_set_physymclk(dccg, 0, PHYSYMCLK_FORCE_SRC_SYMCLK, false); + dccg401_set_physymclk(dccg, 1, PHYSYMCLK_FORCE_SRC_SYMCLK, false); + dccg401_set_physymclk(dccg, 2, PHYSYMCLK_FORCE_SRC_SYMCLK, false); + dccg401_set_physymclk(dccg, 3, PHYSYMCLK_FORCE_SRC_SYMCLK, false); + } +} + +static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 1, + DSCCLK0_DTO_MODULO, 1); + REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1, DSCCLK0_DTO_DB_EN, 1); + break; + case 1: + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 1, + DSCCLK1_DTO_MODULO, 1); + REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1, DSCCLK1_DTO_DB_EN, 1); + break; + case 2: + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 1, + DSCCLK2_DTO_MODULO, 1); + REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1, DSCCLK2_DTO_DB_EN, 1); + break; + case 3: + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 1, + DSCCLK3_DTO_MODULO, 1); + REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1, DSCCLK3_DTO_DB_EN, 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg401_set_ref_dscclk(struct dccg *dccg, + uint32_t dsc_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (dsc_inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 1); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 1); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 1); + break; + case 3: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 1); + break; + default: + return; + } +} + + +static const struct dccg_funcs dccg401_funcs = { + .update_dpp_dto = dccg401_update_dpp_dto, + .get_dccg_ref_freq = dccg401_get_dccg_ref_freq, + .dccg_init = dccg401_init, + .set_dpstreamclk = dccg401_set_dpstreamclk, + .enable_symclk32_se = dccg31_enable_symclk32_se, + .disable_symclk32_se = dccg31_disable_symclk32_se, + .enable_symclk32_le = dccg401_enable_symclk32_le, + .disable_symclk32_le = dccg401_disable_symclk32_le, + .set_physymclk = dccg401_set_physymclk, + .set_dtbclk_dto = NULL, + .set_dto_dscclk = dccg401_set_dto_dscclk, + .set_ref_dscclk = dccg401_set_ref_dscclk, + .set_valid_pixel_rate = NULL, + .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, + .set_audio_dtbclk_dto = NULL, + .otg_add_pixel = dccg401_otg_add_pixel, + .otg_drop_pixel = dccg401_otg_drop_pixel, + .set_pixel_rate_div = dccg401_set_pixel_rate_div, + .set_dp_dto = dccg401_set_dp_dto, + .set_dtbclk_p_src = dccg401_set_dtbclk_p_src, +}; + +struct dccg *dccg401_create( + struct dc_context *ctx, + const struct dccg_registers *regs, + const struct dccg_shift *dccg_shift, + const struct dccg_mask *dccg_mask) +{ + struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL); + struct dccg *base; + + if (dccg_dcn == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + base = &dccg_dcn->base; + base->ctx = ctx; + base->funcs = &dccg401_funcs; + + dccg_dcn->regs = regs; + dccg_dcn->dccg_shift = dccg_shift; + dccg_dcn->dccg_mask = dccg_mask; + + return &dccg_dcn->base; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.h new file mode 100644 index 000000000000..8d9e26a760a3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dccg.h @@ -0,0 +1,205 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN401_DCCG_H__ +#define __DCN401_DCCG_H__ + +#include "dcn32/dcn32_dccg.h" + +#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\ + .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix + +#define DCCG_MASK_SH_LIST_DCN401(mask_sh) \ + DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\ + DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\ + DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\ + DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\ + DCCG_SF(DPPCLK_CTRL, DPPCLK0_EN, mask_sh),\ + DCCG_SF(DPPCLK_CTRL, DPPCLK1_EN, mask_sh),\ + DCCG_SF(DPPCLK_CTRL, DPPCLK2_EN, mask_sh),\ + DCCG_SF(DPPCLK_CTRL, DPPCLK3_EN, mask_sh),\ + DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\ + DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\ + DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\ + DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\ + DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\ + DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\ + DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\ + DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\ + DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\ + DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\ + DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\ + DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\ + DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\ + DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\ + DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_TMDS_PIXEL_RATE_DIV, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO0_INT, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_TMDS_PIXEL_RATE_DIV, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO1_INT, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_TMDS_PIXEL_RATE_DIV, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO2_INT, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_TMDS_PIXEL_RATE_DIV, mask_sh),\ + DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO3_INT, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\ + DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\ + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\ + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\ + DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 1, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 2, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 3, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\ + DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_DB_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_DB_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_DB_EN, mask_sh),\ + DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_DB_EN, mask_sh),\ + DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\ + DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + +void dccg401_init(struct dccg *dccg); + +void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk); + +void dccg401_set_src_sel( + struct dccg *dccg, + const struct dtbclk_dto_params *params); + +struct dccg *dccg401_create( + struct dc_context *ctx, + const struct dccg_registers *regs, + const struct dccg_shift *dccg_shift, + const struct dccg_mask *dccg_mask); + +void dccg401_set_physymclk( + struct dccg *dccg, + int phy_inst, + enum physymclk_clock_source clk_src, + bool force_enable); + +#endif //__DCN401_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.c new file mode 100644 index 000000000000..7e558ca195ef --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.c @@ -0,0 +1,322 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "reg_helper.h" + +#include "core_types.h" +#include "link_encoder.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn32/dcn32_dio_link_encoder.h" +#include "dcn401_dio_link_encoder.h" +#include "stream_encoder.h" +#include "dc_bios_types.h" + +#include "gpio_service_interface.h" + +#ifndef MIN +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif + +#define CTX \ + enc10->base.ctx +#define DC_LOGGER \ + enc10->base.ctx->logger + +#define REG(reg)\ + (enc10->link_regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + enc10->link_shift->field_name, enc10->link_mask->field_name + +#define AUX_REG(reg)\ + (enc10->aux_regs->reg) + +#define AUX_REG_READ(reg_name) \ + dm_read_reg(CTX, AUX_REG(reg_name)) + +#define AUX_REG_WRITE(reg_name, val) \ + dm_write_reg(CTX, AUX_REG(reg_name), val) + +#ifndef MIN +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif + +void enc401_hw_init(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + +/* + 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2 + 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4 + 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8 + 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16 + 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32 + 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64 + 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128 + 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256 +*/ + +/* + AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0, + AUX_RX_START_WINDOW = 1 [6:4] + AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8] + AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1 + AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0 + AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1 + AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 + AUX_RX_DETECTION_THRESHOLD [30:28] = 1 +*/ + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + + //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; + // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk + // 27MHz -> 0xd + // 100MHz -> 0x32 + // 48MHz -> 0x18 + + // Set TMDS_CTL0 to 1. This is a legacy setting. + REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1); + + dcn10_aux_initialize(enc10); +} + + +void dcn401_link_encoder_enable_dp_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source) +{ + if (!enc->ctx->dc->debug.avoid_vbios_exec_table) { + dcn10_link_encoder_enable_dp_output(enc, link_settings, clock_source); + return; + } +} + +void dcn401_link_encoder_setup( + struct link_encoder *enc, + enum signal_type signal) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + switch (signal) { + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_DISPLAY_PORT: + /* DP SST */ + REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 0); + break; + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + /* TMDS-DVI */ + REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 2); + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + /* TMDS-HDMI */ + REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 3); + break; + case SIGNAL_TYPE_DISPLAY_PORT_MST: + /* DP MST */ + REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 5); + break; + default: + ASSERT_CRITICAL(false); + /* invalid mode ! */ + break; + } + REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, 1); + REG_UPDATE(DIG_BE_EN_CNTL, DIG_BE_ENABLE, 1); +} + +bool dcn401_is_dig_enabled(struct link_encoder *enc) +{ + uint32_t clk_enabled; + uint32_t dig_enabled; + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + REG_GET(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, &clk_enabled); + REG_GET(DIG_BE_EN_CNTL, DIG_BE_ENABLE, &dig_enabled); + return (clk_enabled == 1 && dig_enabled == 1); +} + +enum signal_type dcn401_get_dig_mode( + struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t value; + REG_GET(DIG_BE_CLK_CNTL, DIG_BE_MODE, &value); + switch (value) { + case 0: + return SIGNAL_TYPE_DISPLAY_PORT; + case 2: + return SIGNAL_TYPE_DVI_SINGLE_LINK; + case 3: + return SIGNAL_TYPE_HDMI_TYPE_A; + case 5: + return SIGNAL_TYPE_DISPLAY_PORT_MST; + default: + return SIGNAL_TYPE_NONE; + } +} + +static const struct link_encoder_funcs dcn401_link_enc_funcs = { + .read_state = link_enc2_read_state, + .validate_output_with_stream = + dcn30_link_encoder_validate_output_with_stream, + .hw_init = enc401_hw_init, + .setup = dcn401_link_encoder_setup, + .enable_tmds_output = dcn10_link_encoder_enable_tmds_output, + .enable_dp_output = dcn401_link_encoder_enable_dp_output, + .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output, + .disable_output = dcn10_link_encoder_disable_output, + .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings, + .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern, + .update_mst_stream_allocation_table = + dcn10_link_encoder_update_mst_stream_allocation_table, + .psr_program_dp_dphy_fast_training = + dcn10_psr_program_dp_dphy_fast_training, + .psr_program_secondary_packet = dcn10_psr_program_secondary_packet, + .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe, + .enable_hpd = dcn10_link_encoder_enable_hpd, + .disable_hpd = dcn10_link_encoder_disable_hpd, + .is_dig_enabled = dcn401_is_dig_enabled, + .destroy = dcn10_link_encoder_destroy, + .fec_set_enable = enc2_fec_set_enable, + .fec_set_ready = enc2_fec_set_ready, + .fec_is_active = enc2_fec_is_active, + .get_dig_frontend = dcn10_get_dig_frontend, + .get_dig_mode = dcn401_get_dig_mode, + .is_in_alt_mode = dcn32_link_encoder_is_in_alt_mode, + .get_max_link_cap = dcn32_link_encoder_get_max_link_cap, + .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux, +}; + +void dcn401_link_encoder_construct( + struct dcn20_link_encoder *enc20, + const struct encoder_init_data *init_data, + const struct encoder_feature_support *enc_features, + const struct dcn10_link_enc_registers *link_regs, + const struct dcn10_link_enc_aux_registers *aux_regs, + const struct dcn10_link_enc_hpd_registers *hpd_regs, + const struct dcn10_link_enc_shift *link_shift, + const struct dcn10_link_enc_mask *link_mask) +{ + struct bp_connector_speed_cap_info bp_cap_info = {0}; + const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs; + enum bp_result result = BP_RESULT_OK; + struct dcn10_link_encoder *enc10 = &enc20->enc10; + + enc10->base.funcs = &dcn401_link_enc_funcs; + enc10->base.ctx = init_data->ctx; + enc10->base.id = init_data->encoder; + + enc10->base.hpd_source = init_data->hpd_source; + enc10->base.connector = init_data->connector; + + + enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; + + enc10->base.features = *enc_features; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; + + enc10->base.transmitter = init_data->transmitter; + + /* set the flag to indicate whether driver poll the I2C data pin + * while doing the DP sink detect + */ + +/* if (dal_adapter_service_is_feature_supported(as, + FEATURE_DP_SINK_DETECT_POLL_DATA_PIN)) + enc10->base.features.flags.bits. + DP_SINK_DETECT_POLL_DATA_PIN = true;*/ + + enc10->base.output_signals = + SIGNAL_TYPE_DVI_SINGLE_LINK | + SIGNAL_TYPE_DVI_DUAL_LINK | + SIGNAL_TYPE_LVDS | + SIGNAL_TYPE_DISPLAY_PORT | + SIGNAL_TYPE_DISPLAY_PORT_MST | + SIGNAL_TYPE_EDP | + SIGNAL_TYPE_HDMI_TYPE_A; + + enc10->link_regs = link_regs; + enc10->aux_regs = aux_regs; + enc10->hpd_regs = hpd_regs; + enc10->link_shift = link_shift; + enc10->link_mask = link_mask; + + switch (enc10->base.transmitter) { + case TRANSMITTER_UNIPHY_A: + enc10->base.preferred_engine = ENGINE_ID_DIGA; + break; + case TRANSMITTER_UNIPHY_B: + enc10->base.preferred_engine = ENGINE_ID_DIGB; + break; + case TRANSMITTER_UNIPHY_C: + enc10->base.preferred_engine = ENGINE_ID_DIGC; + break; + case TRANSMITTER_UNIPHY_D: + enc10->base.preferred_engine = ENGINE_ID_DIGD; + break; + case TRANSMITTER_UNIPHY_E: + enc10->base.preferred_engine = ENGINE_ID_DIGE; + break; + default: + ASSERT_CRITICAL(false); + enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; + } + + /* default to one to mirror Windows behavior */ + enc10->base.features.flags.bits.HDMI_6GB_EN = 1; + + if (bp_funcs->get_connector_speed_cap_info) + result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios, + enc10->base.connector, &bp_cap_info); + + /* Override features with DCE-specific values */ + if (result == BP_RESULT_OK) { + enc10->base.features.flags.bits.IS_HBR2_CAPABLE = + bp_cap_info.DP_HBR2_EN; + enc10->base.features.flags.bits.IS_HBR3_CAPABLE = + bp_cap_info.DP_HBR3_EN; + enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; + enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1; + enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN; + enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN; + enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN; + } else { + DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", + __func__, + result); + } + if (enc10->base.ctx->dc->debug.hdmi20_disable) { + enc10->base.features.flags.bits.HDMI_6GB_EN = 0; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.h new file mode 100644 index 000000000000..6baab8302b81 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_link_encoder.h @@ -0,0 +1,134 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_LINK_ENCODER__DCN401_H__ +#define __DC_LINK_ENCODER__DCN401_H__ + +#include "dcn30/dcn30_dio_link_encoder.h" + +#define LINK_ENCODER_MASK_SH_LIST_DCN401(mask_sh) \ + LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_BE_ENABLE, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_RB_SWITCH_EN, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_MODE, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SOFT_RESET, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, HDCP_SOFT_RESET, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_CLOCK_ON, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_HDCP_CLOCK_ON, mask_sh),\ + LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_TMDS_CLOCK_ON, mask_sh),\ + LE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\ + LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\ + LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\ + LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\ + LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\ + LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\ + LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\ + LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\ + LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\ + LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\ + LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\ + LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ + LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\ + LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\ + LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\ + LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\ + LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\ + LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_EN, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, mask_sh),\ + LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_START_WINDOW, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_HALF_SYM_DETECT_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_TRANSITION_FILTER_EN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_START, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_STOP, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_PHASE_DETECT_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \ + LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh) + +void dcn401_link_encoder_construct( + struct dcn20_link_encoder *enc20, + const struct encoder_init_data *init_data, + const struct encoder_feature_support *enc_features, + const struct dcn10_link_enc_registers *link_regs, + const struct dcn10_link_enc_aux_registers *aux_regs, + const struct dcn10_link_enc_hpd_registers *hpd_regs, + const struct dcn10_link_enc_shift *link_shift, + const struct dcn10_link_enc_mask *link_mask); + +void enc401_hw_init(struct link_encoder *enc); + +void dcn401_link_encoder_enable_dp_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source); + +void dcn401_link_encoder_setup( + struct link_encoder *enc, + enum signal_type signal); + +enum signal_type dcn401_get_dig_mode( + struct link_encoder *enc); + +bool dcn401_is_dig_enabled(struct link_encoder *enc); + +enum signal_type dcn401_get_dig_mode(struct link_encoder *enc); +#endif /* __DC_LINK_ENCODER__DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.c new file mode 100644 index 000000000000..be0ebb6a8a55 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.c @@ -0,0 +1,895 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dc_bios_types.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn32/dcn32_dio_stream_encoder.h" + +#include "dcn401_dio_stream_encoder.h" +#include "reg_helper.h" +#include "hw_shared.h" +#include "link.h" +#include "dpcd_defs.h" + +#define DC_LOGGER \ + enc1->base.ctx->logger + +#define REG(reg)\ + (enc1->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + enc1->se_shift->field_name, enc1->se_mask->field_name + +#define VBI_LINE_0 0 +#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000 + +#define CTX \ + enc1->base.ctx + + + +static void enc401_dp_set_odm_combine( + struct stream_encoder *enc, + bool odm_combine) +{ +} + +/* setup stream encoder in dvi mode */ +static void enc401_stream_encoder_dvi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + bool is_dual_link) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (!enc->ctx->dc->debug.avoid_vbios_exec_table) { + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = is_dual_link ? + SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK; + cntl.enable_dp_audio = false; + cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10; + cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + } else { + + //Set pattern for clock channel, default vlue 0x63 does not work + REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F); + + //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup + + //DIG_SOURCE_SELECT is already set in dig_connect_to_otg + + /* DIG_START is removed from the register spec */ + } + + ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); + ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888); + enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); +} + +/* setup stream encoder in hdmi mode */ +static void enc401_stream_encoder_hdmi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + int actual_pix_clk_khz, + bool enable_audio) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (!enc->ctx->dc->debug.avoid_vbios_exec_table) { + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A; + cntl.enable_dp_audio = enable_audio; + cntl.pixel_clock = actual_pix_clk_khz; + cntl.lanes_number = LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + } else { + + //Set pattern for clock channel, default vlue 0x63 does not work + REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F); + + //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup + + //DIG_SOURCE_SELECT is already set in dig_connect_to_otg + + /* DIG_START is removed from the register spec */ + } + + /* Configure pixel encoding */ + enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); + + /* setup HDMI engine */ + REG_UPDATE_6(HDMI_CONTROL, + HDMI_PACKET_GEN_VERSION, 1, + HDMI_KEEPOUT_MODE, 1, + HDMI_DEEP_COLOR_ENABLE, 0, + HDMI_DATA_SCRAMBLE_EN, 0, + HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1, + HDMI_CLOCK_CHANNEL_RATE, 0); + + /* Configure color depth */ + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_888: + REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); + break; + case COLOR_DEPTH_101010: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_121212: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_161616: + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 3, + HDMI_DEEP_COLOR_ENABLE, 1); + break; + default: + break; + } + + if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) { + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_RATE_MORE_340M + * Clock channel frequency is 1/4 of character rate. + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 1); + } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) { + + /* TODO: New feature for DCE11, still need to implement */ + + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE + * Clock channel frequency is the same + * as character rate + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 0); + } + + + /* Enable transmission of General Control packet on every frame */ + REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL, + HDMI_GC_CONT, 1, + HDMI_GC_SEND, 1, + HDMI_NULL_SEND, 1); + + /* Disable Audio Content Protection packet transmission */ + REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0); + /* following belongs to audio */ + /* Enable Audio InfoFrame packet transmission. */ + REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + + /* update double-buffered AUDIO_INFO registers immediately */ + ASSERT(enc->afmt); + enc->afmt->funcs->audio_info_immediate_update(enc->afmt); + + /* Select line number on which to send Audio InfoFrame packets */ + REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, + VBI_LINE_0 + 2); + + /* set HDMI GC AVMUTE */ + REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0); +} + + + +static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing) +{ + bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; + + two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422 + && !timing->dsc_cfg.ycbcr422_simple); + return two_pix; +} + +static bool is_h_timing_divisible_by_2(const struct dc_crtc_timing *timing) +{ + /* math borrowed from function of same name in inc/resource + * checks if h_timing is divisible by 2 + */ + + bool divisible = false; + uint16_t h_blank_start = 0; + uint16_t h_blank_end = 0; + + if (timing) { + h_blank_start = timing->h_total - timing->h_front_porch; + h_blank_end = h_blank_start - timing->h_addressable; + + /* HTOTAL, Hblank start/end, and Hsync start/end all must be + * divisible by 2 in order for the horizontal timing params + * to be considered divisible by 2. Hsync start is always 0. + */ + divisible = (timing->h_total % 2 == 0) && + (h_blank_start % 2 == 0) && + (h_blank_end % 2 == 0) && + (timing->h_sync_width % 2 == 0); + } + return divisible; +} + +static bool is_dp_dig_pixel_rate_div_policy(struct dc *dc, const struct dc_crtc_timing *timing) +{ + /* should be functionally the same as dcn32_is_dp_dig_pixel_rate_div_policy for DP encoders*/ + return is_h_timing_divisible_by_2(timing) && + dc->debug.enable_dp_dig_pixel_rate_div_policy; +} + +static void enc401_stream_encoder_dp_unblank( + struct dc_link *link, + struct stream_encoder *enc, + const struct encoder_unblank_param *param) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct dc *dc = enc->ctx->dc; + + if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) { + uint32_t n_vid = 0x8000; + uint32_t m_vid; + uint32_t n_multiply = 0; + // TODO: Fix defined but not used + //uint32_t pix_per_cycle = 0; + uint64_t m_vid_l = n_vid; + + /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */ + if (is_two_pixels_per_containter(¶m->timing) || param->opp_cnt > 1 + || is_dp_dig_pixel_rate_div_policy(dc, ¶m->timing)) { + /*this logic should be the same in get_pixel_clock_parameters() */ + n_multiply = 1; + // TODO: Fix defined but not used + //pix_per_cycle = 1; + } + /* M / N = Fstream / Flink + * m_vid / n_vid = pixel rate / link rate + */ + + m_vid_l *= param->timing.pix_clk_100hz / 10; + m_vid_l = div_u64(m_vid_l, + param->link_settings.link_rate + * LINK_RATE_REF_FREQ_IN_KHZ); + + m_vid = (uint32_t) m_vid_l; + + /* enable auto measurement */ + + REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0); + + /* auto measurement need 1 full 0x8000 symbol cycle to kick in, + * therefore program initial value for Mvid and Nvid + */ + + REG_UPDATE(DP_VID_N, DP_VID_N, n_vid); + + REG_UPDATE(DP_VID_M, DP_VID_M, m_vid); + + REG_UPDATE_2(DP_VID_TIMING, + DP_VID_M_N_GEN_EN, 1, + DP_VID_N_INTERVAL, n_multiply); + } + + /* make sure stream is disabled before resetting steer fifo */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false); + REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000); + + /* DIG_START is removed from the register spec */ + + /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen + * that it overflows during mode transition, and sometimes doesn't recover. + */ + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1); + udelay(10); + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, 1); + + REG_UPDATE_2(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 1, DP_VID_STREAM_DIS_DEFER, 2); + udelay(200); + + /* DIG Resync FIFO now needs to be explicitly enabled + */ + /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1 + * so set it to 1/2 full = 7 before reset as suggested by hardware team. + */ + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7); + + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); + + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); + + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); + + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); + + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); + + /* wait 100us for DIG/DP logic to prime + * (i.e. a few video lines) + */ + udelay(100); + + /* the hardware would start sending video at the start of the next DP + * frame (i.e. rising edge of the vblank). + * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this + * register has no effect on enable transition! HW always guarantees + * VID_STREAM enable at start of next frame, and this is not + * programmable + */ + + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true); + + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM); +} + +/* Set DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + * sc_bytes_per_pixel: DP_DSC_BYTES_PER_PIXEL removed in DCN3x + * dsc_slice_width: DP_DSC_SLICE_WIDTH removed in DCN3x + */ +static void enc401_dp_set_dsc_config(struct stream_encoder *enc, + enum optc_dsc_mode dsc_mode, + uint32_t dsc_bytes_per_pixel, + uint32_t dsc_slice_width) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(DP_DSC_CNTL, DP_DSC_MODE, dsc_mode == OPTC_DSC_DISABLED ? 0 : 1); +} + +/* this function read dsc related register fields to be logged later in dcn10_log_hw_state + * into a dcn_dsc_state struct. + */ +static void enc401_read_state(struct stream_encoder *enc, struct enc_state *s) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + //if dsc is enabled, continue to read + REG_GET(DP_DSC_CNTL, DP_DSC_MODE, &s->dsc_mode); + if (s->dsc_mode) { + REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, &s->sec_gsp_pps_line_num); + + REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, &s->vbid6_line_reference); + REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, &s->vbid6_line_num); + + REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, &s->sec_gsp_pps_enable); + REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable); + } +} + +static void enc401_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + // The naming of this field is confusing, what it means is the output mode of otg, which + // is the input mode of the dig + switch (pix_per_container) { + case 2: + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x1); + break; + case 4: + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x2); + break; + case 8: + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x3); + break; + default: + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x0); + break; + } +} +static void enc401_stream_encoder_enable( + struct stream_encoder *enc, + enum signal_type signal, + bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + if (enable) { + switch (signal) { + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + /* TMDS-DVI */ + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 2); + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + /* TMDS-HDMI */ + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 3); + break; + case SIGNAL_TYPE_DISPLAY_PORT_MST: + /* DP MST */ + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 5); + break; + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_DISPLAY_PORT: + /* DP SST */ + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 0); + break; + default: + /* invalid mode ! */ + ASSERT_CRITICAL(false); + } + + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1); + REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1); + } else { + REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0); + REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0); + } +} + +void enc401_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space, + bool use_vsc_sdp_for_colorimetry, + uint32_t enable_sdp_splitting) +{ + uint32_t h_active_start; + uint32_t v_active_start; + uint32_t misc0 = 0; + uint32_t misc1 = 0; + uint32_t h_blank; + uint32_t h_back_porch; + uint8_t synchronous_clock = 0; /* asynchronous mode */ + uint8_t colorimetry_bpc; + uint8_t dp_pixel_encoding = 0; + uint8_t dp_component_depth = 0; + uint8_t dp_translate_pixel_enc = 0; + // Fix set but not used warnings + //uint8_t dp_pixel_encoding_type = 0; + uint8_t dp_compressed_pixel_format = 0; + + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct dc_crtc_timing hw_crtc_timing = *crtc_timing; + + if (hw_crtc_timing.flags.INTERLACE) { + /*the input timing is in VESA spec format with Interlace flag =1*/ + hw_crtc_timing.v_total /= 2; + hw_crtc_timing.v_border_top /= 2; + hw_crtc_timing.v_addressable /= 2; + hw_crtc_timing.v_border_bottom /= 2; + hw_crtc_timing.v_front_porch /= 2; + hw_crtc_timing.v_sync_width /= 2; + } + + + /* set pixel encoding */ + switch (hw_crtc_timing.pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR422; + break; + case PIXEL_ENCODING_YCBCR444: + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR444; + + if (hw_crtc_timing.flags.Y_ONLY) + if (hw_crtc_timing.display_color_depth != COLOR_DEPTH_666) + /* HW testing only, no use case yet. + * Color depth of Y-only could be + * 8, 10, 12, 16 bits + */ + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_Y_ONLY; + + /* Note: DP_MSA_MISC1 bit 7 is the indicator + * of Y-only mode. + * This bit is set in HW if register + * DP_PIXEL_ENCODING is programmed to 0x4 + */ + break; + case PIXEL_ENCODING_YCBCR420: + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR420; + break; + default: + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_RGB444; + break; + } + + misc1 = REG_READ(DP_MSA_MISC); + /* For YCbCr420 and BT2020 Colorimetry Formats, VSC SDP shall be used. + * When MISC1, bit 6, is Set to 1, a Source device uses a VSC SDP to indicate the + * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7, + * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care"). + */ + if (use_vsc_sdp_for_colorimetry) + misc1 = misc1 | 0x40; + else + misc1 = misc1 & ~0x40; + + /* set color depth */ + switch (hw_crtc_timing.display_color_depth) { + case COLOR_DEPTH_666: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC; + break; + case COLOR_DEPTH_888: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_8BPC; + break; + case COLOR_DEPTH_101010: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_10BPC; + break; + case COLOR_DEPTH_121212: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_12BPC; + break; + case COLOR_DEPTH_161616: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_16BPC; + break; + default: + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC; + break; + } + + if (hw_crtc_timing.flags.DSC) { + // Fix set but not used error + //dp_pixel_encoding_type = 1; + switch (hw_crtc_timing.pixel_encoding) { + case PIXEL_ENCODING_YCBCR444: + dp_compressed_pixel_format = 0; + break; + case PIXEL_ENCODING_YCBCR422: + dp_compressed_pixel_format = 1; + if (hw_crtc_timing.dsc_cfg.ycbcr422_simple) + dp_compressed_pixel_format = 0; + break; + case PIXEL_ENCODING_YCBCR420: + dp_compressed_pixel_format = 1; + break; + default: + dp_compressed_pixel_format = 0; + break; + } + } else { + // Fix set but not used error + //dp_pixel_encoding_type = 0; + switch (dp_pixel_encoding) { + case DP_PIXEL_ENCODING_TYPE_RGB444: + dp_translate_pixel_enc = 0; + break; + case DP_PIXEL_ENCODING_TYPE_YCBCR422: + dp_translate_pixel_enc = 1; + break; + case DP_PIXEL_ENCODING_TYPE_YCBCR444: + dp_translate_pixel_enc = 0; + break; + case DP_PIXEL_ENCODING_TYPE_Y_ONLY: + dp_translate_pixel_enc = 3; + break; + case DP_PIXEL_ENCODING_TYPE_YCBCR420: + dp_translate_pixel_enc = 2; + break; + default: + ASSERT(0); + break; + } + } + /* Set DP pixel encoding and component depth */ + REG_UPDATE_4(DP_PIXEL_FORMAT, + PIXEL_ENCODING_TYPE, hw_crtc_timing.flags.DSC ? 1 : 0, + UNCOMPRESSED_PIXEL_FORMAT, dp_translate_pixel_enc, + UNCOMPRESSED_COMPONENT_DEPTH, dp_component_depth, + COMPRESSED_PIXEL_FORMAT, dp_compressed_pixel_format); + + /* set dynamic range and YCbCr range */ + + switch (hw_crtc_timing.display_color_depth) { + case COLOR_DEPTH_666: + colorimetry_bpc = 0; + break; + case COLOR_DEPTH_888: + colorimetry_bpc = 1; + break; + case COLOR_DEPTH_101010: + colorimetry_bpc = 2; + break; + case COLOR_DEPTH_121212: + colorimetry_bpc = 3; + break; + default: + colorimetry_bpc = 0; + break; + } + + misc0 = misc0 | synchronous_clock; + misc0 = colorimetry_bpc << 5; + + switch (output_color_space) { + case COLOR_SPACE_SRGB: + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + break; + case COLOR_SPACE_SRGB_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + break; + case COLOR_SPACE_YCBCR601: + case COLOR_SPACE_YCBCR601_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_YCBCR709: + case COLOR_SPACE_YCBCR709_LIMITED: + misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_2020_RGB_LIMITEDRANGE: + case COLOR_SPACE_2020_RGB_FULLRANGE: + case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_XR_RGB: + case COLOR_SPACE_MSREF_SCRGB: + case COLOR_SPACE_ADOBERGB: + case COLOR_SPACE_DCIP3: + case COLOR_SPACE_XV_YCC_709: + case COLOR_SPACE_XV_YCC_601: + case COLOR_SPACE_DISPLAYNATIVE: + case COLOR_SPACE_DOLBYVISION: + case COLOR_SPACE_APPCTRL: + case COLOR_SPACE_CUSTOMPOINTS: + case COLOR_SPACE_UNKNOWN: + case COLOR_SPACE_YCBCR709_BLACK: + /* do nothing */ + break; + } + + REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0); + REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */ + + /* dcn new register + * dc_crtc_timing is vesa dmt struct. data from edid + */ + REG_SET_2(DP_MSA_TIMING_PARAM1, 0, + DP_MSA_HTOTAL, hw_crtc_timing.h_total, + DP_MSA_VTOTAL, hw_crtc_timing.v_total); + + /* calculate from vesa timing parameters + * h_active_start related to leading edge of sync + */ + + h_blank = hw_crtc_timing.h_total - hw_crtc_timing.h_border_left - + hw_crtc_timing.h_addressable - hw_crtc_timing.h_border_right; + + h_back_porch = h_blank - hw_crtc_timing.h_front_porch - + hw_crtc_timing.h_sync_width; + + /* start at beginning of left border */ + h_active_start = hw_crtc_timing.h_sync_width + h_back_porch; + + + v_active_start = hw_crtc_timing.v_total - hw_crtc_timing.v_border_top - + hw_crtc_timing.v_addressable - hw_crtc_timing.v_border_bottom - + hw_crtc_timing.v_front_porch; + + + /* start at beginning of left border */ + REG_SET_2(DP_MSA_TIMING_PARAM2, 0, + DP_MSA_HSTART, h_active_start, + DP_MSA_VSTART, v_active_start); + + REG_SET_4(DP_MSA_TIMING_PARAM3, 0, + DP_MSA_HSYNCWIDTH, + hw_crtc_timing.h_sync_width, + DP_MSA_HSYNCPOLARITY, + !hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY, + DP_MSA_VSYNCWIDTH, + hw_crtc_timing.v_sync_width, + DP_MSA_VSYNCPOLARITY, + !hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY); + + /* HWDITH include border or overscan */ + REG_SET_2(DP_MSA_TIMING_PARAM4, 0, + DP_MSA_HWIDTH, hw_crtc_timing.h_border_left + + hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right, + DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top + + hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom); + + REG_UPDATE(DP_SEC_FRAMING4, + DP_SST_SDP_SPLITTING, enable_sdp_splitting); +} + +static void enc401_stream_encoder_map_to_link( + struct stream_encoder *enc, + uint32_t stream_enc_inst, + uint32_t link_enc_inst) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(STREAM_MAPPER_CONTROL, + DIG_STREAM_LINK_TARGET, link_enc_inst); +} + +static const struct stream_encoder_funcs dcn401_str_enc_funcs = { + .dp_set_odm_combine = + enc401_dp_set_odm_combine, + .dp_set_stream_attribute = + enc401_stream_encoder_dp_set_stream_attribute, + .hdmi_set_stream_attribute = + enc401_stream_encoder_hdmi_set_stream_attribute, + .dvi_set_stream_attribute = + enc401_stream_encoder_dvi_set_stream_attribute, + .set_throttled_vcp_size = + enc1_stream_encoder_set_throttled_vcp_size, + .update_hdmi_info_packets = + enc3_stream_encoder_update_hdmi_info_packets, + .stop_hdmi_info_packets = + enc3_stream_encoder_stop_hdmi_info_packets, + .update_dp_info_packets_sdp_line_num = + enc3_stream_encoder_update_dp_info_packets_sdp_line_num, + .update_dp_info_packets = + enc3_stream_encoder_update_dp_info_packets, + .stop_dp_info_packets = + enc1_stream_encoder_stop_dp_info_packets, + .dp_blank = + enc1_stream_encoder_dp_blank, + .dp_unblank = + enc401_stream_encoder_dp_unblank, + .audio_mute_control = enc3_audio_mute_control, + + .dp_audio_setup = enc3_se_dp_audio_setup, + .dp_audio_enable = enc3_se_dp_audio_enable, + .dp_audio_disable = enc1_se_dp_audio_disable, + + .hdmi_audio_setup = enc3_se_hdmi_audio_setup, + .hdmi_audio_disable = enc1_se_hdmi_audio_disable, + .setup_stereo_sync = enc1_setup_stereo_sync, + .set_avmute = enc1_stream_encoder_set_avmute, + .dig_connect_to_otg = enc1_dig_connect_to_otg, + .dig_source_otg = enc1_dig_source_otg, + + .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format, + + .enc_read_state = enc401_read_state, + .dp_set_dsc_config = enc401_dp_set_dsc_config, + .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet, + .set_dynamic_metadata = enc401_set_dynamic_metadata, + .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute, + .dig_stream_enable = enc401_stream_encoder_enable, + + .set_input_mode = enc401_set_dig_input_mode, + .enable_fifo = enc32_enable_fifo, + .map_stream_to_link = enc401_stream_encoder_map_to_link, +}; + +void dcn401_dio_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + struct vpg *vpg, + struct afmt *afmt, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask) +{ + enc1->base.funcs = &dcn401_str_enc_funcs; + enc1->base.ctx = ctx; + enc1->base.id = eng_id; + enc1->base.bp = bp; + enc1->base.vpg = vpg; + enc1->base.afmt = afmt; + enc1->regs = regs; + enc1->se_shift = se_shift; + enc1->se_mask = se_mask; + enc1->base.stream_enc_inst = vpg->inst; +} + +void enc401_set_dynamic_metadata(struct stream_encoder *enc, + bool enable_dme, + uint32_t hubp_requestor_id, + enum dynamic_metadata_mode dmdata_mode) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (enable_dme) { + REG_UPDATE_2(DME_CONTROL, + METADATA_HUBP_REQUESTOR_ID, hubp_requestor_id, + METADATA_STREAM_TYPE, (dmdata_mode == dmdata_dolby_vision) ? 1 : 0); + + /* Use default line reference DP_SOF for bringup. + * Should use OTG_SOF for DRR cases + */ + if (dmdata_mode == dmdata_dp) + REG_UPDATE_3(DP_SEC_METADATA_TRANSMISSION, + DP_SEC_METADATA_PACKET_ENABLE, 1, + DP_SEC_METADATA_PACKET_LINE_REFERENCE, 0, + DP_SEC_METADATA_PACKET_LINE, 20); + else { + REG_UPDATE_3(HDMI_METADATA_PACKET_CONTROL, + HDMI_METADATA_PACKET_ENABLE, 1, + HDMI_METADATA_PACKET_LINE_REFERENCE, 0, + HDMI_METADATA_PACKET_LINE, 2); + + if (dmdata_mode == dmdata_dolby_vision) + REG_UPDATE(HDMI_CONTROL, + DOLBY_VISION_EN, 1); + } + + REG_UPDATE(DME_CONTROL, + METADATA_ENGINE_EN, 1); + } else { + REG_UPDATE(DME_CONTROL, + METADATA_ENGINE_EN, 0); + + if (dmdata_mode == dmdata_dp) + REG_UPDATE(DP_SEC_METADATA_TRANSMISSION, + DP_SEC_METADATA_PACKET_ENABLE, 0); + else { + REG_UPDATE(HDMI_METADATA_PACKET_CONTROL, + HDMI_METADATA_PACKET_ENABLE, 0); + REG_UPDATE(HDMI_CONTROL, + DOLBY_VISION_EN, 0); + } + } +} +void enc401_stream_encoder_set_stream_attribute_helper( + struct dcn10_stream_encoder *enc1, + struct dc_crtc_timing *crtc_timing) +{ + switch (crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 1); + break; + default: + REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 0); + break; + } + REG_UPDATE(HDMI_CONTROL, TMDS_COLOR_FORMAT, 0); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.h new file mode 100644 index 000000000000..d751839598f8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_dio_stream_encoder.h @@ -0,0 +1,217 @@ +/* + * Copyright 2021 - Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DIO_STREAM_ENCODER_DCN401_H__ +#define __DC_DIO_STREAM_ENCODER_DCN401_H__ + +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "stream_encoder.h" +#include "dcn20/dcn20_stream_encoder.h" + +#define SE_COMMON_MASK_SH_LIST_DCN401(mask_sh)\ + SE_SF(DP0_DP_PIXEL_FORMAT, PIXEL_ENCODING_TYPE, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_PIXEL_FORMAT, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_COMPONENT_DEPTH, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, COMPRESSED_PIXEL_FORMAT, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ + SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\ + SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\ + SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\ + SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\ + SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\ + SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\ + SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\ + SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\ + SE_SF(DIG1_HDMI_CONTROL, TMDS_PIXEL_ENCODING, mask_sh),\ + SE_SF(DIG1_HDMI_CONTROL, TMDS_COLOR_FORMAT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\ + SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\ + SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\ + SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_N_INTERVAL, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\ + SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\ + SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\ + SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\ + SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\ + SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\ + SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\ + SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\ + SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, DOLBY_VISION_EN, mask_sh),\ + SE_SF(DIG0_DIG_FE_EN_CNTL, DIG_FE_ENABLE, mask_sh),\ + SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_MODE, mask_sh),\ + SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, mask_sh),\ + SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOFT_RESET, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\ + SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\ + SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\ + SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, mask_sh),\ + SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\ + SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\ + SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\ + SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\ + SE_SF(DIG0_STREAM_MAPPER_CONTROL, DIG_STREAM_LINK_TARGET, mask_sh), + + +void dcn401_dio_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + struct vpg *vpg, + struct afmt *afmt, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask); + +void enc401_set_dynamic_metadata(struct stream_encoder *enc, + bool enable_dme, + uint32_t hubp_requestor_id, + enum dynamic_metadata_mode dmdata_mode); +void enc401_stream_encoder_set_stream_attribute_helper( + struct dcn10_stream_encoder *enc1, + struct dc_crtc_timing *crtc_timing); +void enc401_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space, + bool use_vsc_sdp_for_colorimetry, + uint32_t enable_sdp_splitting); +#endif /* __DC_DIO_STREAM_ENCODER_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.c new file mode 100644 index 000000000000..597817b51228 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.c @@ -0,0 +1,933 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dcn30/dcn30_hubbub.h" +#include "dcn401_hubbub.h" +#include "dm_services.h" +#include "reg_helper.h" + + +#define CTX \ + hubbub2->base.ctx +#define DC_LOGGER \ + hubbub2->base.ctx->logger +#define REG(reg)\ + hubbub2->regs->reg + +#undef FN +#define FN(reg_name, field_name) \ + hubbub2->shifts->field_name, hubbub2->masks->field_name + +static void dcn401_init_crb(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + REG_GET(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, + &hubbub2->det0_size); + + REG_GET(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, + &hubbub2->det1_size); + + REG_GET(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, + &hubbub2->det2_size); + + REG_GET(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, + &hubbub2->det3_size); + + REG_GET(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, + &hubbub2->compbuf_size_segments); + + REG_SET(COMPBUF_RESERVED_SPACE, 0, + COMPBUF_RESERVED_SPACE_64B, hubbub2->pixel_chunk_size / 32); // 256 64Bytes +} + +bool hubbub401_program_urgent_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + + /* Repeat for water mark set A and B */ + /* clock state A */ + if (safe_to_lower || watermarks->dcn4.a.urgent > hubbub2->watermarks.dcn4.a.urgent) { + hubbub2->watermarks.dcn4.a.urgent = watermarks->dcn4.a.urgent; + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4.a.urgent); + DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.a.urgent, watermarks->dcn4.a.urgent); + } else if (watermarks->dcn4.a.urgent < hubbub2->watermarks.dcn4.a.urgent) + wm_pending = true; + + /* determine the transfer time for a quantity of data for a particular requestor.*/ + if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_flip + > hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4.a.frac_urg_bw_flip = watermarks->dcn4.a.frac_urg_bw_flip; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0, + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4.a.frac_urg_bw_flip); + } else if (watermarks->dcn4.a.frac_urg_bw_flip + < hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_nom + > hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4.a.frac_urg_bw_nom = watermarks->dcn4.a.frac_urg_bw_nom; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, + DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4.a.frac_urg_bw_nom); + } else if (watermarks->dcn4.a.frac_urg_bw_nom + < hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_mall + > hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4.a.frac_urg_bw_mall = watermarks->dcn4.a.frac_urg_bw_mall; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0, + DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4.a.frac_urg_bw_mall); + } else if (watermarks->dcn4.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem = watermarks->dcn4.a.refcyc_per_trip_to_mem; + REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4.a.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4.a.refcyc_per_meta_trip_to_mem; + REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0, + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4.a.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) + wm_pending = true; + + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.urgent > hubbub2->watermarks.dcn4.b.urgent) { + hubbub2->watermarks.dcn4.b.urgent = watermarks->dcn4.b.urgent; + REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4.b.urgent); + DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.b.urgent, watermarks->dcn4.b.urgent); + } else if (watermarks->dcn4.b.urgent < hubbub2->watermarks.dcn4.b.urgent) + wm_pending = true; + + /* determine the transfer time for a quantity of data for a particular requestor.*/ + if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_flip + > hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4.b.frac_urg_bw_flip = watermarks->dcn4.b.frac_urg_bw_flip; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0, + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4.b.frac_urg_bw_flip); + } else if (watermarks->dcn4.b.frac_urg_bw_flip + < hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_nom + > hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4.b.frac_urg_bw_nom = watermarks->dcn4.b.frac_urg_bw_nom; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0, + DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4.b.frac_urg_bw_nom); + } else if (watermarks->dcn4.b.frac_urg_bw_nom + < hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_mall + > hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4.b.frac_urg_bw_mall = watermarks->dcn4.b.frac_urg_bw_mall; + REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0, + DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4.b.frac_urg_bw_mall); + } else if (watermarks->dcn4.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem = watermarks->dcn4.b.refcyc_per_trip_to_mem; + REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4.b.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4.b.refcyc_per_meta_trip_to_mem; + REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0, + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4.b.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) + wm_pending = true; + + return wm_pending; +} + +bool hubbub401_program_stutter_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + + /* clock state A */ + if (safe_to_lower || watermarks->dcn4.a.sr_enter + > hubbub2->watermarks.dcn4.a.sr_enter) { + hubbub2->watermarks.dcn4.a.sr_enter = + watermarks->dcn4.a.sr_enter; + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4.a.sr_enter); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.a.sr_enter, watermarks->dcn4.a.sr_enter); + // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4.a.sr_enter); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4.a.sr_enter); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4.a.sr_enter); + + } else if (watermarks->dcn4.a.sr_enter + < hubbub2->watermarks.dcn4.a.sr_enter) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.a.sr_exit + > hubbub2->watermarks.dcn4.a.sr_exit) { + hubbub2->watermarks.dcn4.a.sr_exit = + watermarks->dcn4.a.sr_exit; + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4.a.sr_exit); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.a.sr_exit, watermarks->dcn4.a.sr_exit); + // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4.a.sr_exit); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4.a.sr_exit); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4.a.sr_exit); + + } else if (watermarks->dcn4.a.sr_exit + < hubbub2->watermarks.dcn4.a.sr_exit) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.sr_enter + > hubbub2->watermarks.dcn4.b.sr_enter) { + hubbub2->watermarks.dcn4.b.sr_enter = + watermarks->dcn4.b.sr_enter; + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4.b.sr_enter); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.b.sr_enter, watermarks->dcn4.b.sr_enter); + // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4.b.sr_enter); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4.b.sr_enter); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4.b.sr_enter); + + } else if (watermarks->dcn4.b.sr_enter + < hubbub2->watermarks.dcn4.b.sr_enter) + wm_pending = true; + + if (safe_to_lower || watermarks->dcn4.b.sr_exit + > hubbub2->watermarks.dcn4.b.sr_exit) { + hubbub2->watermarks.dcn4.b.sr_exit = + watermarks->dcn4.b.sr_exit; + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4.b.sr_exit); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->dcn4.b.sr_exit, watermarks->dcn4.b.sr_exit); + // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4.b.sr_exit); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4.b.sr_exit); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4.b.sr_exit); + + } else if (watermarks->dcn4.b.sr_exit + < hubbub2->watermarks.dcn4.b.sr_exit) + wm_pending = true; + + return wm_pending; +} + + +bool hubbub401_program_pstate_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + + /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */ + /* clock state A */ + if (safe_to_lower || watermarks->dcn4.a.uclk_pstate + > hubbub2->watermarks.dcn4.a.uclk_pstate) { + hubbub2->watermarks.dcn4.a.uclk_pstate = + watermarks->dcn4.a.uclk_pstate; + REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.uclk_pstate); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.a.uclk_pstate, watermarks->dcn4.a.uclk_pstate); + } else if (watermarks->dcn4.a.uclk_pstate + < hubbub2->watermarks.dcn4.a.uclk_pstate) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.uclk_pstate + > hubbub2->watermarks.dcn4.b.uclk_pstate) { + hubbub2->watermarks.dcn4.b.uclk_pstate = + watermarks->dcn4.b.uclk_pstate; + REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.uclk_pstate); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.b.uclk_pstate, watermarks->dcn4.b.uclk_pstate); + } else if (watermarks->dcn4.b.uclk_pstate + < hubbub2->watermarks.dcn4.b.uclk_pstate) + wm_pending = true; + + /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ + if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4.a.temp_read_or_ppt = + watermarks->dcn4.a.temp_read_or_ppt; + REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); + } else if (watermarks->dcn4.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4.b.temp_read_or_ppt = + watermarks->dcn4.b.temp_read_or_ppt; + REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); + } else if (watermarks->dcn4.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + wm_pending = true; + + /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */ + /* clock state A */ + if (safe_to_lower || watermarks->dcn4.a.fclk_pstate + > hubbub2->watermarks.dcn4.a.fclk_pstate) { + hubbub2->watermarks.dcn4.a.fclk_pstate = + watermarks->dcn4.a.fclk_pstate; + REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.fclk_pstate); + DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.a.fclk_pstate, watermarks->dcn4.a.fclk_pstate); + } else if (watermarks->dcn4.a.fclk_pstate + < hubbub2->watermarks.dcn4.a.fclk_pstate) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.fclk_pstate + > hubbub2->watermarks.dcn4.b.fclk_pstate) { + hubbub2->watermarks.dcn4.b.fclk_pstate = + watermarks->dcn4.b.fclk_pstate; + REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.fclk_pstate); + DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.b.fclk_pstate, watermarks->dcn4.b.fclk_pstate); + } else if (watermarks->dcn4.b.fclk_pstate + < hubbub2->watermarks.dcn4.b.fclk_pstate) + wm_pending = true; + + /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ + if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4.a.temp_read_or_ppt = + watermarks->dcn4.a.temp_read_or_ppt; + REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); + } else if (watermarks->dcn4.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4.b.temp_read_or_ppt = + watermarks->dcn4.b.temp_read_or_ppt; + REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); + } else if (watermarks->dcn4.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + wm_pending = true; + + return wm_pending; +} + + +bool hubbub401_program_usr_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + + /* clock state A */ + if (safe_to_lower || watermarks->dcn4.a.usr + > hubbub2->watermarks.dcn4.a.usr) { + hubbub2->watermarks.dcn4.a.usr = watermarks->dcn4.a.usr; + REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0, + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4.a.usr); + DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.a.usr, watermarks->dcn4.a.usr); + } else if (watermarks->dcn4.a.usr + < hubbub2->watermarks.dcn4.a.usr) + wm_pending = true; + + /* clock state B */ + if (safe_to_lower || watermarks->dcn4.b.usr + > hubbub2->watermarks.dcn4.b.usr) { + hubbub2->watermarks.dcn4.b.usr = watermarks->dcn4.b.usr; + REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0, + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4.b.usr); + DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->dcn4.b.usr, watermarks->dcn4.b.usr); + } else if (watermarks->dcn4.b.usr + < hubbub2->watermarks.dcn4.b.usr) + wm_pending = true; + + return wm_pending; +} + + +static bool hubbub401_program_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + bool wm_pending = false; + + if (hubbub401_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) + wm_pending = true; + + if (hubbub401_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) + wm_pending = true; + + if (hubbub401_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) + wm_pending = true; + + if (hubbub401_program_usr_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) + wm_pending = true; + + /* + * The DCHub arbiter has a mechanism to dynamically rate limit the DCHub request stream to the fabric. + * If the memory controller is fully utilized and the DCHub requestors are + * well ahead of their amortized schedule, then it is safe to prevent the next winner + * from being committed and sent to the fabric. + * The utilization of the memory controller is approximated by ensuring that + * the number of outstanding requests is greater than a threshold specified + * by the ARB_MIN_REQ_OUTSTANDING. To determine that the DCHub requestors are well ahead of the amortized + * schedule, the slack of the next winner is compared with the ARB_SAT_LEVEL in DLG RefClk cycles. + * + * TODO: Revisit request limit after figure out right number. request limit for RM isn't decided yet, + * set maximum value (0x1FF) to turn off it for now. + */ + /*REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0, + DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); + REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, + DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); + */ + + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + + hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); + + return wm_pending; +} + +/* Copy values from WM set A to all other sets */ +static void hubbub401_init_watermarks(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + uint32_t reg; + + reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A); + REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A); + REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A); + REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A); + REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A); + REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, reg); + REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, reg); + reg = REG_READ(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A); + REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, reg); + + reg = REG_READ(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A); + REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, reg); + reg = REG_READ(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A); + REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, reg); +} + +static void hubbub401_wm_read_state(struct hubbub *hubbub, + struct dcn_hubbub_wm *wm) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + struct dcn_hubbub_wm_set *s; + + memset(wm, 0, sizeof(struct dcn_hubbub_wm)); + + s = &wm->sets[0]; + s->wm_set = 0; + REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, &s->data_urgent); + + REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, &s->sr_enter); + + REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit); + + REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change); + + REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain); + + REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, &s->fclk_pstate_change); + + s = &wm->sets[1]; + s->wm_set = 1; + REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, &s->data_urgent); + + REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, &s->sr_enter); + + REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit); + + REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change); + + REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain); + + REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, &s->fclk_pstate_change); +} + +bool hubbub401_dcc_support_swizzle( + enum swizzle_mode_addr3_values swizzle, + unsigned int plane_pitch, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert) +{ + bool swizzle_supported = false; + + switch (swizzle) { + case DC_ADDR3_SW_LINEAR: + if ((plane_pitch * bytes_per_element) % 256 == 0) + swizzle_supported = true; + break; + case DC_ADDR3_SW_64KB_2D: + case DC_ADDR3_SW_256KB_2D: + swizzle_supported = true; + break; + default: + swizzle_supported = false; + break; + } + + if (swizzle_supported) { + if (bytes_per_element == 1) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__non_contiguous; + return true; + } + if (bytes_per_element == 2) { + *segment_order_horz = segment_order__non_contiguous; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 4) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__non_contiguous; + return true; + } + if (bytes_per_element == 8) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__non_contiguous; + return true; + } + } + + return false; +} + +bool hubbub401_dcc_support_pixel_format( + enum surface_pixel_format format, + unsigned int *plane0_bpe, + unsigned int *plane1_bpe) +{ + switch (format) { + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + *plane0_bpe = 2; + *plane1_bpe = 0; + return true; + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + *plane0_bpe = 1; + *plane1_bpe = 2; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS: + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX: + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT: + case SURFACE_PIXEL_FORMAT_GRPH_RGBE: + *plane0_bpe = 4; + *plane1_bpe = 0; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + *plane0_bpe = 4; + *plane1_bpe = 1; + return true; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + *plane0_bpe = 2; + *plane1_bpe = 4; + return true; + case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: + case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102: + case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: + *plane0_bpe = 4; + *plane1_bpe = 0; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + *plane0_bpe = 8; + *plane1_bpe = 0; + return true; + default: + return false; + } +} + +void hubbub401_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, + unsigned int bytes_per_element) +{ + if (bytes_per_element == 1) { + *blk256_width = 16; + *blk256_height = 16; + } else if (bytes_per_element == 2) { + *blk256_width = 16; + *blk256_height = 8; + } else if (bytes_per_element == 4) { + *blk256_width = 8; + *blk256_height = 8; + } else if (bytes_per_element == 8) { + *blk256_width = 8; + *blk256_height = 4; + } +} + +void hubbub401_det_request_size( + unsigned int detile_buf_size, + enum surface_pixel_format format, + unsigned int p0_height, + unsigned int p0_width, + unsigned int p0_bpe, + unsigned int p1_height, + unsigned int p1_width, + unsigned int p1_bpe, + bool *p0_req128_horz_wc, + bool *p0_req128_vert_wc, + bool *p1_req128_horz_wc, + bool *p1_req128_vert_wc) +{ + unsigned int blk256_height = 0; + unsigned int blk256_width = 0; + unsigned int p0_swath_bytes_horz_wc, p0_swath_bytes_vert_wc; + unsigned int p1_swath_bytes_horz_wc, p1_swath_bytes_vert_wc; + + //For plane0 + hubbub401_get_blk256_size(&blk256_width, &blk256_height, p0_bpe); + + p0_swath_bytes_horz_wc = p0_width * blk256_height * p0_bpe; + p0_swath_bytes_vert_wc = p0_height * blk256_width * p0_bpe; + + *p0_req128_horz_wc = (2 * p0_swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + *p0_req128_vert_wc = (2 * p0_swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + /*For dual planes needs to be considered together */ + if (p1_bpe) { + hubbub401_get_blk256_size(&blk256_width, &blk256_height, p1_bpe); + + p1_swath_bytes_horz_wc = p1_width * blk256_height * p1_bpe; + p1_swath_bytes_vert_wc = p1_height * blk256_width * p1_bpe; + + switch (format) { + default: + /* No any adjustment needed*/ + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + /* Packing at the ratio of 3:2 is supported before the detile buffer + * for YUV420 video with 10bpc (P010). Need to adjust for that. + */ + p0_swath_bytes_horz_wc = (((p0_swath_bytes_horz_wc * 2) / 3 + 255) / 256) * 256; + p0_swath_bytes_vert_wc = (((p0_swath_bytes_vert_wc * 2) / 3 + 255) / 256) * 256; + p1_swath_bytes_horz_wc = (((p1_swath_bytes_horz_wc * 2) / 3 + 255) / 256) * 256; + p1_swath_bytes_vert_wc = (((p1_swath_bytes_vert_wc * 2) / 3 + 255) / 256) * 256; + break; + } + + *p0_req128_horz_wc = *p1_req128_horz_wc = (2 * p0_swath_bytes_horz_wc + + 2 * p1_swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128B request */ + + *p0_req128_vert_wc = *p1_req128_vert_wc = (2 * p0_swath_bytes_vert_wc + + 2 * p1_swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128B request */ + + /* If 128B requests are true, meaning 2 full swaths of data cannot fit + * in de-tile buffer, check if one plane can use 256B request while + * the other plane is using 128B requests + */ + if (*p0_req128_horz_wc) { + // If ratio around 1:1 between p0 and p1 try to recalulate if p0 can use 256B + if (p0_swath_bytes_horz_wc <= p1_swath_bytes_horz_wc + p1_swath_bytes_horz_wc / 2) { + + *p0_req128_horz_wc = (2 * p0_swath_bytes_horz_wc + p1_swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + } else { + /* ratio about 2:1 between p0 and p1, try to recalulate if p1 can use 256B */ + *p1_req128_horz_wc = (p0_swath_bytes_horz_wc + 2 * p1_swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + } + } + + if (*p0_req128_vert_wc) { + // If ratio around 1:1 between p0 and p1 try to recalulate if p0 can use 256B + if (p0_swath_bytes_vert_wc <= p1_swath_bytes_vert_wc + p1_swath_bytes_vert_wc / 2) { + + *p0_req128_vert_wc = (2 * p0_swath_bytes_vert_wc + p1_swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + } else { + /* ratio about 2:1 between p0 and p1, try to recalulate if p1 can use 256B */ + *p1_req128_vert_wc = (p0_swath_bytes_vert_wc + 2 * p1_swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + } + } + } +} + +static void dcn401_program_det_segments(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + switch (hubp_inst) { + case 0: + REG_UPDATE(DCHUBBUB_DET0_CTRL, + DET0_SIZE, det_buffer_size_seg); + hubbub2->det0_size = det_buffer_size_seg; + break; + case 1: + REG_UPDATE(DCHUBBUB_DET1_CTRL, + DET1_SIZE, det_buffer_size_seg); + hubbub2->det1_size = det_buffer_size_seg; + break; + case 2: + REG_UPDATE(DCHUBBUB_DET2_CTRL, + DET2_SIZE, det_buffer_size_seg); + hubbub2->det2_size = det_buffer_size_seg; + break; + case 3: + REG_UPDATE(DCHUBBUB_DET3_CTRL, + DET3_SIZE, det_buffer_size_seg); + hubbub2->det3_size = det_buffer_size_seg; + break; + default: + break; + } + if (hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size + + hubbub2->det3_size + hubbub2->compbuf_size_segments > hubbub2->crb_size_segs) { + /* This may happen during seamless transition from ODM 2:1 to ODM4:1 */ + DC_LOG_WARNING("CRB Config Warning: DET size (%d,%d,%d,%d) + Compbuf size (%d) > CRB segments (%d)\n", + hubbub2->det0_size, hubbub2->det1_size, hubbub2->det2_size, hubbub2->det3_size, + hubbub2->compbuf_size_segments, hubbub2->crb_size_segs); + } +} + +static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + unsigned int cur_compbuf_size_seg = 0; + + if (safe_to_increase || compbuf_size_seg <= hubbub2->compbuf_size_segments) { + if (compbuf_size_seg > hubbub2->compbuf_size_segments) { + REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100); + REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100); + REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100); + REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100); + } + /* Should never be hit, if it is we have an erroneous hw config*/ + ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size + + hubbub2->det3_size + compbuf_size_seg <= hubbub2->crb_size_segs); + REG_UPDATE(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, compbuf_size_seg); + hubbub2->compbuf_size_segments = compbuf_size_seg; +#ifdef DIAGS_BUILD + REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, &cur_compbuf_size_seg); + ASSERT(!cur_compbuf_size_seg); +#else + ASSERT(REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, &cur_compbuf_size_seg) && !cur_compbuf_size_seg); +#endif + } +} + +static const struct hubbub_funcs hubbub4_01_funcs = { + .update_dchub = hubbub2_update_dchub, + .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, + .init_vm_ctx = hubbub2_init_vm_ctx, + .dcc_support_swizzle_addr3 = hubbub401_dcc_support_swizzle, + .dcc_support_pixel_format_plane0_plane1 = hubbub401_dcc_support_pixel_format, + .wm_read_state = hubbub401_wm_read_state, + .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, + .program_watermarks = hubbub401_program_watermarks, + .allow_self_refresh_control = hubbub1_allow_self_refresh_control, + .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, + .verify_allow_pstate_change_high = NULL, + .force_wm_propagate_to_pipes = hubbub32_force_wm_propagate_to_pipes, + .force_pstate_change_control = hubbub3_force_pstate_change_control, + .init_watermarks = hubbub401_init_watermarks, + .init_crb = dcn401_init_crb, + .hubbub_read_state = hubbub2_read_state, + .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, + .set_request_limit = hubbub32_set_request_limit, + .program_det_segments = dcn401_program_det_segments, + .program_compbuf_segments = dcn401_program_compbuf_segments, +}; + +void hubbub401_construct(struct dcn20_hubbub *hubbub2, + struct dc_context *ctx, + const struct dcn_hubbub_registers *hubbub_regs, + const struct dcn_hubbub_shift *hubbub_shift, + const struct dcn_hubbub_mask *hubbub_mask, + int det_size_kb, + int pixel_chunk_size_kb, + int config_return_buffer_size_kb) +{ + hubbub2->base.ctx = ctx; + hubbub2->base.funcs = &hubbub4_01_funcs; + hubbub2->regs = hubbub_regs; + hubbub2->shifts = hubbub_shift; + hubbub2->masks = hubbub_mask; + + hubbub2->detile_buf_size = det_size_kb * 1024; + hubbub2->pixel_chunk_size = pixel_chunk_size_kb * 1024; + hubbub2->crb_size_segs = config_return_buffer_size_kb / DCN4_01_CRB_SEGMENT_SIZE_KB; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.h new file mode 100644 index 000000000000..d8a57f64a70c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubbub.h @@ -0,0 +1,192 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_HUBBUB_DCN401_H__ +#define __DC_HUBBUB_DCN401_H__ + +#include "dcn32/dcn32_hubbub.h" + +#define DCN4_01_CRB_SIZE_KB 1344 +#define DCN4_01_DEFAULT_DET_SIZE 320 +#define DCN4_01_CRB_SEGMENT_SIZE_KB 64 + +#define HUBBUB_MASK_SH_LIST_DCN4_01(mask_sh)\ + HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MAX_REQ_OUTSTAND, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HUBBUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE, mask_sh), \ + HUBBUB_SF(DCN_VM_FB_LOCATION_TOP, FB_TOP, mask_sh), \ + HUBBUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET, mask_sh), \ + HUBBUB_SF(DCN_VM_AGP_BOT, AGP_BOT, mask_sh), \ + HUBBUB_SF(DCN_VM_AGP_TOP, AGP_TOP, mask_sh), \ + HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET0_CTRL, DET0_SIZE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET1_CTRL, DET1_SIZE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET2_CTRL, DET2_SIZE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET3_CTRL, DET3_SIZE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, mask_sh),\ + HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_64B, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_ALLOW_USR_RETRAINING_FORCE_VALUE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_ALLOW_USR_RETRAINING_FORCE_ENABLE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_DO_NOT_FORCE_ALLOW_USR_RETRAINING_DURING_PSTATE_CHANGE_REQUEST, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_DO_NOT_FORCE_ALLOW_USR_RETRAINING_DURING_PRE_CSTATE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, mask_sh),\ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, mask_sh), \ + HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh),\ + HUBBUB_SF(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DISPCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\ + HUBBUB_SF(DCHUBBUB_SDPIF_CFG1, SDPIF_MAX_NUM_OUTSTANDING, mask_sh),\ + HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh) + + +bool hubbub401_program_urgent_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + +bool hubbub401_program_stutter_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + +bool hubbub401_program_pstate_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + +bool hubbub401_program_usr_watermarks( + struct hubbub *hubbub, + union dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + +bool hubbub401_dcc_support_swizzle( + enum swizzle_mode_addr3_values swizzle, + unsigned int plane_pitch, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert); + +bool hubbub401_dcc_support_pixel_format( + enum surface_pixel_format format, + unsigned int *plane0_bpe, + unsigned int *plane1_bpe); + +void hubbub401_get_blk256_size( + unsigned int *blk256_width, + unsigned int *blk256_height, + unsigned int bytes_per_element); + +void hubbub401_det_request_size( + unsigned int detile_buf_size, + enum surface_pixel_format format, + unsigned int p0_height, + unsigned int p0_width, + unsigned int p0_bpe, + unsigned int p1_height, + unsigned int p1_width, + unsigned int p1_bpe, + bool *p0_req128_horz_wc, + bool *p0_req128_vert_wc, + bool *p1_req128_horz_wc, + bool *p1_req128_vert_wc); +void hubbub401_construct(struct dcn20_hubbub *hubbub2, + struct dc_context *ctx, + const struct dcn_hubbub_registers *hubbub_regs, + const struct dcn_hubbub_shift *hubbub_shift, + const struct dcn_hubbub_mask *hubbub_mask, + int det_size_kb, + int pixel_chunk_size_kb, + int config_return_buffer_size_kb); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.c new file mode 100644 index 000000000000..6692d57d5cce --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.c @@ -0,0 +1,1027 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dce_calcs.h" +#include "reg_helper.h" +#include "basics/conversion.h" +#include "dcn401_hubp.h" +#include "dal_asic_id.h" + +#define REG(reg)\ + hubp2->hubp_regs->reg + +#define CTX \ + hubp2->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name + +static void hubp401_program_3dlut_fl_addr(struct hubp *hubp, + const struct dc_plane_address address) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, address.lut3d.addr.high_part); + REG_WRITE(HUBP_3DLUT_ADDRESS_LOW, address.lut3d.addr.low_part); +} + +static void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, refcyc_per_3dlut_group); +} + +static void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ENABLE, enable ? 1 : 0); +} + +int hubp401_get_3dlut_fl_done(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + int ret; + + REG_GET(HUBP_3DLUT_CONTROL, HUBP_3DLUT_DONE, &ret); + return ret; +} + +static void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, addr_mode); +} + +static void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width); +} + +static void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0); +} + +static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE_3(HUBP_3DLUT_CONTROL, + HUBP_3DLUT_CROSSBAR_SELECT_Y_G, bit_slice_y_g, + HUBP_3DLUT_CROSSBAR_SELECT_CB_B, bit_slice_cb_b, + HUBP_3DLUT_CROSSBAR_SELECT_CR_R, bit_slice_cr_r); +} + +static void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, bias, HUBP0_3DLUT_FL_SCALE, scale); +} + +static void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mode); +} + +static void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, format); +} + +void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + // Also cache cursor in MALL if using MALL for SS + REG_UPDATE_2(DCHUBP_MALL_CONFIG, USE_MALL_SEL, mall_sel, + USE_MALL_FOR_CURSOR, c_cursor); + + REG_UPDATE_2(DCHUBP_MALL_CONFIG, MALL_PREF_CMD_TYPE, 1, MALL_PREF_MODE, 0); +} + + +void hubp401_init(struct hubp *hubp) +{ + //For now nothing to do, HUBPREQ_DEBUG_DB register is removed on DCN4x. +} + +void hubp401_vready_at_or_After_vsync(struct hubp *hubp, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) +{ + uint32_t value = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + /* + * if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END + * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1 + * else + * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0 + */ + if (pipe_dest->htotal != 0) { + if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width + + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) { + value = 1; + } else + value = 0; + } + + REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value); +} + +void hubp401_program_requestor( + struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(HUBPRET_CONTROL, + DET_BUF_PLANE1_BASE_ADDRESS, rq_regs->plane1_base_address); + REG_SET_4(DCN_EXPANSION_MODE, 0, + DRQ_EXPANSION_MODE, rq_regs->drq_expansion_mode, + PRQ_EXPANSION_MODE, rq_regs->prq_expansion_mode, + MRQ_EXPANSION_MODE, rq_regs->mrq_expansion_mode, + CRQ_EXPANSION_MODE, rq_regs->crq_expansion_mode); + REG_SET_6(DCHUBP_REQ_SIZE_CONFIG, 0, + CHUNK_SIZE, rq_regs->rq_regs_l.chunk_size, + MIN_CHUNK_SIZE, rq_regs->rq_regs_l.min_chunk_size, + DPTE_GROUP_SIZE, rq_regs->rq_regs_l.dpte_group_size, + VM_GROUP_SIZE, rq_regs->rq_regs_l.mpte_group_size, + SWATH_HEIGHT, rq_regs->rq_regs_l.swath_height, + PTE_ROW_HEIGHT_LINEAR, rq_regs->rq_regs_l.pte_row_height_linear); + REG_SET_5(DCHUBP_REQ_SIZE_CONFIG_C, 0, + CHUNK_SIZE_C, rq_regs->rq_regs_c.chunk_size, + MIN_CHUNK_SIZE_C, rq_regs->rq_regs_c.min_chunk_size, + DPTE_GROUP_SIZE_C, rq_regs->rq_regs_c.dpte_group_size, + SWATH_HEIGHT_C, rq_regs->rq_regs_c.swath_height, + PTE_ROW_HEIGHT_LINEAR_C, rq_regs->rq_regs_c.pte_row_height_linear); +} + +void hubp401_program_deadline( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + /* put DLG in mission mode */ + REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8); + + /* DLG - Per hubp */ + REG_SET_2(BLANK_OFFSET_0, 0, + REFCYC_H_BLANK_END, dlg_attr->refcyc_h_blank_end, + DLG_V_BLANK_END, dlg_attr->dlg_vblank_end); + + REG_SET(BLANK_OFFSET_1, 0, + MIN_DST_Y_NEXT_START, dlg_attr->min_dst_y_next_start); + + REG_SET(DST_DIMENSIONS, 0, + REFCYC_PER_HTOTAL, dlg_attr->refcyc_per_htotal); + + REG_SET_2(DST_AFTER_SCALER, 0, + REFCYC_X_AFTER_SCALER, dlg_attr->refcyc_x_after_scaler, + DST_Y_AFTER_SCALER, dlg_attr->dst_y_after_scaler); + + REG_SET(REF_FREQ_TO_PIX_FREQ, 0, + REF_FREQ_TO_PIX_FREQ, dlg_attr->ref_freq_to_pix_freq); + + /* DLG - Per luma/chroma */ + REG_SET(VBLANK_PARAMETERS_1, 0, + REFCYC_PER_PTE_GROUP_VBLANK_L, dlg_attr->refcyc_per_pte_group_vblank_l); + + if (REG(NOM_PARAMETERS_0)) + REG_SET(NOM_PARAMETERS_0, 0, + DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); + + if (REG(NOM_PARAMETERS_1)) + REG_SET(NOM_PARAMETERS_1, 0, + REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l); + + REG_SET(NOM_PARAMETERS_4, 0, + DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l); + + REG_SET(NOM_PARAMETERS_5, 0, + REFCYC_PER_META_CHUNK_NOM_L, dlg_attr->refcyc_per_meta_chunk_nom_l); + + REG_SET_2(PER_LINE_DELIVERY, 0, + REFCYC_PER_LINE_DELIVERY_L, dlg_attr->refcyc_per_line_delivery_l, + REFCYC_PER_LINE_DELIVERY_C, dlg_attr->refcyc_per_line_delivery_c); + + REG_SET(VBLANK_PARAMETERS_2, 0, + REFCYC_PER_PTE_GROUP_VBLANK_C, dlg_attr->refcyc_per_pte_group_vblank_c); + + if (REG(NOM_PARAMETERS_2)) + REG_SET(NOM_PARAMETERS_2, 0, + DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); + + if (REG(NOM_PARAMETERS_3)) + REG_SET(NOM_PARAMETERS_3, 0, + REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c); + + REG_SET(NOM_PARAMETERS_6, 0, + DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c); + + REG_SET(NOM_PARAMETERS_7, 0, + REFCYC_PER_META_CHUNK_NOM_C, dlg_attr->refcyc_per_meta_chunk_nom_c); + + /* TTU - per hubp */ + REG_SET_2(DCN_TTU_QOS_WM, 0, + QoS_LEVEL_LOW_WM, ttu_attr->qos_level_low_wm, + QoS_LEVEL_HIGH_WM, ttu_attr->qos_level_high_wm); + + /* TTU - per luma/chroma */ + /* Assumed surf0 is luma and 1 is chroma */ + + REG_SET_3(DCN_SURF0_TTU_CNTL0, 0, + REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_l, + QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_l, + QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_l); + + REG_SET_3(DCN_SURF1_TTU_CNTL0, 0, + REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_c, + QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_c, + QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_c); + + REG_SET_3(DCN_CUR0_TTU_CNTL0, 0, + REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0, + QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0, + QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0); + + REG_SET(FLIP_PARAMETERS_1, 0, + REFCYC_PER_PTE_GROUP_FLIP_L, dlg_attr->refcyc_per_pte_group_flip_l); + REG_SET(HUBP_3DLUT_DLG_PARAM, 0, REFCYC_PER_3DLUT_GROUP, dlg_attr->refcyc_per_tdlut_group); + + REG_UPDATE(DCN_DMDATA_VM_CNTL, + REFCYC_PER_VM_DMDATA, dlg_attr->refcyc_per_vm_dmdata); +} + +void hubp401_setup( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr, + struct _vcs_dpi_display_rq_regs_st *rq_regs, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest) +{ + /* otg is locked when this func is called. Register are double buffered. + * disable the requestors is not needed + */ + hubp401_vready_at_or_After_vsync(hubp, pipe_dest); + hubp401_program_requestor(hubp, rq_regs); + hubp401_program_deadline(hubp, dlg_attr, ttu_attr); +} + +void hubp401_setup_interdependent( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET_2(PREFETCH_SETTINGS, 0, + DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, dlg_attr->vratio_prefetch); + + REG_SET(PREFETCH_SETTINGS_C, 0, + VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); + + REG_SET_2(VBLANK_PARAMETERS_0, 0, + DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); + + REG_SET_2(FLIP_PARAMETERS_0, 0, + DST_Y_PER_VM_FLIP, dlg_attr->dst_y_per_vm_flip, + DST_Y_PER_ROW_FLIP, dlg_attr->dst_y_per_row_flip); + + REG_SET(VBLANK_PARAMETERS_3, 0, + REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); + + REG_SET(VBLANK_PARAMETERS_4, 0, + REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); + + REG_SET(FLIP_PARAMETERS_2, 0, + REFCYC_PER_META_CHUNK_FLIP_L, dlg_attr->refcyc_per_meta_chunk_flip_l); + + REG_SET_2(PER_LINE_DELIVERY_PRE, 0, + REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); + + REG_SET(DCN_SURF0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, + ttu_attr->refcyc_per_req_delivery_pre_l); + REG_SET(DCN_SURF1_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, + ttu_attr->refcyc_per_req_delivery_pre_c); + REG_SET(DCN_CUR0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); + + REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, + MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, + QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); +} + + +bool hubp401_program_surface_flip_and_addr( + struct hubp *hubp, + const struct dc_plane_address *address, + bool flip_immediate) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + //program flip type + REG_UPDATE(DCSURF_FLIP_CONTROL, + SURFACE_FLIP_TYPE, flip_immediate); + + // Program VMID reg + if (flip_immediate == 0) + REG_UPDATE(VMID_SETTINGS_0, + VMID, address->vmid); + + if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); + + } else { + // turn off stereo if not in stereo + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0); + } + + /* HW automatically latch rest of address register on write to + * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used + * + * program high first and then the low addr, order matters! + */ + switch (address->type) { + case PLN_ADDR_TYPE_GRAPHICS: + if (address->grph.addr.quad_part == 0) + break; + + REG_UPDATE(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + address->grph.addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + address->grph.addr.low_part); + break; + case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE: + if (address->video_progressive.luma_addr.quad_part == 0 + || address->video_progressive.chroma_addr.quad_part == 0) + break; + + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_SURFACE_TMZ_C, address->tmz_surface); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + address->video_progressive.chroma_addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + address->video_progressive.chroma_addr.low_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + address->video_progressive.luma_addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + address->video_progressive.luma_addr.low_part); + break; + case PLN_ADDR_TYPE_GRPH_STEREO: + if (address->grph_stereo.left_addr.quad_part == 0) + break; + if (address->grph_stereo.right_addr.quad_part == 0) + break; + + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_SURFACE_TMZ_C, address->tmz_surface, + SECONDARY_SURFACE_TMZ, address->tmz_surface, + SECONDARY_SURFACE_TMZ_C, address->tmz_surface); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, 0, + SECONDARY_SURFACE_ADDRESS_HIGH_C, + address->grph_stereo.right_alpha_addr.high_part); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_C, 0, + SECONDARY_SURFACE_ADDRESS_C, + address->grph_stereo.right_alpha_addr.low_part); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0, + SECONDARY_SURFACE_ADDRESS_HIGH, + address->grph_stereo.right_addr.high_part); + + REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0, + SECONDARY_SURFACE_ADDRESS, + address->grph_stereo.right_addr.low_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + address->grph_stereo.left_alpha_addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + address->grph_stereo.left_alpha_addr.low_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + address->grph_stereo.left_addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + address->grph_stereo.left_addr.low_part); + break; + case PLN_ADDR_TYPE_RGBEA: + if (address->rgbea.addr.quad_part == 0 + || address->rgbea.alpha_addr.quad_part == 0) + break; + + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_TMZ, address->tmz_surface, + PRIMARY_SURFACE_TMZ_C, address->tmz_surface); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0, + PRIMARY_SURFACE_ADDRESS_HIGH_C, + address->rgbea.alpha_addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0, + PRIMARY_SURFACE_ADDRESS_C, + address->rgbea.alpha_addr.low_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0, + PRIMARY_SURFACE_ADDRESS_HIGH, + address->rgbea.addr.high_part); + + REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0, + PRIMARY_SURFACE_ADDRESS, + address->rgbea.addr.low_part); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + + hubp->request_address = *address; + + return true; +} + +void hubp401_dcc_control(struct hubp *hubp, + struct dc_plane_dcc_param *dcc) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, dcc->enable, + SECONDARY_SURFACE_DCC_EN, dcc->enable); +} + +void hubp401_program_tiling( + struct dcn20_hubp *hubp2, + const union dc_tiling_info *info, + const enum surface_pixel_format pixel_format) +{ + /* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x + * All 4 fields NUM_PIPES, PIPE_INTERLEAVE, MAX_COMPRESSED_FRAGS and NUM_PKRS are irrelevant. + * + * DIM_TYPE field in DCSURF_TILING for Display is always 1 (2D dimension) which is HW default. + */ + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, info->gfx_addr3.swizzle); +} + +void hubp401_program_size( + struct hubp *hubp, + enum surface_pixel_format format, + const struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + uint32_t pitch, pitch_c; + bool use_pitch_c = false; + + /* Program data pitch (calculation from addrlib) + * 444 or 420 luma + */ + use_pitch_c = format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN + && format < SURFACE_PIXEL_FORMAT_SUBSAMPLE_END; + use_pitch_c = use_pitch_c + || (format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA); + if (use_pitch_c) { + ASSERT(plane_size->chroma_pitch != 0); + /* Chroma pitch zero can cause system hang! */ + + pitch = plane_size->surface_pitch - 1; + pitch_c = plane_size->chroma_pitch - 1; + } else { + pitch = plane_size->surface_pitch - 1; + pitch_c = 0; + } + + REG_UPDATE(DCSURF_SURFACE_PITCH, PITCH, pitch); + + if (use_pitch_c) + REG_UPDATE(DCSURF_SURFACE_PITCH_C, PITCH_C, pitch_c); +} + +void hubp401_program_surface_config( + struct hubp *hubp, + enum surface_pixel_format format, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + unsigned int compat_level) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + hubp401_dcc_control(hubp, dcc); + hubp401_program_tiling(hubp2, tiling_info, format); + hubp401_program_size(hubp, format, plane_size, dcc); + hubp2_program_rotation(hubp, rotation, horizontal_mirror); + hubp2_program_pixel_format(hubp, format); +} + +void hubp401_set_viewport( + struct hubp *hubp, + const struct rect *viewport, + const struct rect *viewport_c) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION, 0, + PRI_VIEWPORT_WIDTH, viewport->width, + PRI_VIEWPORT_HEIGHT, viewport->height); + + REG_SET_2(DCSURF_PRI_VIEWPORT_START, 0, + PRI_VIEWPORT_X_START, viewport->x, + PRI_VIEWPORT_Y_START, viewport->y); + + /*for stereo*/ + REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION, 0, + SEC_VIEWPORT_WIDTH, viewport->width, + SEC_VIEWPORT_HEIGHT, viewport->height); + + REG_SET_2(DCSURF_SEC_VIEWPORT_START, 0, + SEC_VIEWPORT_X_START, viewport->x, + SEC_VIEWPORT_Y_START, viewport->y); + + /* DC supports NV12 only at the moment */ + REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION_C, 0, + PRI_VIEWPORT_WIDTH_C, viewport_c->width, + PRI_VIEWPORT_HEIGHT_C, viewport_c->height); + + REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0, + PRI_VIEWPORT_X_START_C, viewport_c->x, + PRI_VIEWPORT_Y_START_C, viewport_c->y); + + REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0, + SEC_VIEWPORT_WIDTH_C, viewport_c->width, + SEC_VIEWPORT_HEIGHT_C, viewport_c->height); + + REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0, + SEC_VIEWPORT_X_START_C, viewport_c->x, + SEC_VIEWPORT_Y_START_C, viewport_c->y); +} + +void hubp401_set_flip_int(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCSURF_SURFACE_FLIP_INTERRUPT, + SURFACE_FLIP_INT_MASK, 1); + + return; +} + +bool hubp401_in_blank(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + uint32_t in_blank; + + REG_GET(DCHUBP_CNTL, HUBP_IN_BLANK, &in_blank); + return in_blank ? true : false; +} + + +void hubp401_cursor_set_position( + struct hubp *hubp, + const struct dc_cursor_position *pos, + const struct dc_cursor_mi_param *param) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + int x_pos = pos->x - param->recout.x; + int y_pos = pos->y - param->recout.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int rec_x_offset = x_pos - pos->x_hotspot; + int rec_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)hubp->curs_attr.height; + int cursor_width = (int)hubp->curs_attr.width; + uint32_t dst_x_offset; + uint32_t cur_en = pos->enable ? 1 : 0; + + hubp->curs_pos = *pos; + + /* + * Guard aganst cursor_set_position() from being called with invalid + * attributes + */ + if (hubp->curs_attr.address.quad_part == 0) + return; + + // Transform cursor width / height and hotspots for offset calculations + if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + + if (param->rotation == ROTATION_ANGLE_90) { + // hotspot = (-y, x) + rec_x_offset = x_pos - (cursor_width - x_hotspot); + rec_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + rec_x_offset = x_pos - x_hotspot; + rec_y_offset = y_pos - (cursor_height - y_hotspot); + } + } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) + if (!param->mirror) + rec_x_offset = x_pos - (cursor_width - x_hotspot); + + rec_y_offset = y_pos - (cursor_height - y_hotspot); + } + + dst_x_offset = (rec_x_offset >= 0) ? rec_x_offset : 0; + dst_x_offset *= param->ref_clk_khz; + dst_x_offset /= param->pixel_clk_khz; + + ASSERT(param->h_scale_ratio.value); + + if (param->h_scale_ratio.value) + dst_x_offset = dc_fixpt_floor(dc_fixpt_div( + dc_fixpt_from_int(dst_x_offset), + param->h_scale_ratio)); + + if (rec_x_offset >= (int)param->recout.width) + cur_en = 0; /* not visible beyond right edge*/ + + if (rec_x_offset + cursor_width <= 0) + cur_en = 0; /* not visible beyond left edge*/ + + if (rec_y_offset >= (int)param->recout.height) + cur_en = 0; /* not visible beyond bottom edge*/ + + if (rec_y_offset + cursor_height <= 0) + cur_en = 0; /* not visible beyond top edge*/ + + if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + + REG_UPDATE(CURSOR_CONTROL, + CURSOR_ENABLE, cur_en); + + REG_SET_2(CURSOR_POSITION, 0, + CURSOR_X_POSITION, pos->x, + CURSOR_Y_POSITION, pos->y); + + REG_SET_2(CURSOR_HOT_SPOT, 0, + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); + + REG_SET(CURSOR_DST_OFFSET, 0, + CURSOR_DST_X_OFFSET, dst_x_offset); + + /* Cursor Position Register Config */ + hubp->pos.cur_ctl.bits.cur_enable = cur_en; + hubp->pos.position.bits.x_pos = pos->x; + hubp->pos.position.bits.y_pos = pos->y; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; + hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; + /* Cursor Rectangle Cache + * Cursor bitmaps have different hotspot values + * There's a possibility that the above logic returns a negative value, + * so we clamp them to 0 + */ + if (rec_x_offset < 0) + rec_x_offset = 0; + if (rec_y_offset < 0) + rec_y_offset = 0; + /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ + hubp->cur_rect.x = rec_x_offset + param->recout.x; + hubp->cur_rect.y = rec_y_offset + param->recout.y; +} + +void hubp401_read_state(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + struct dcn_hubp_state *s = &hubp2->state; + struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr; + struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr; + struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + + /* Requester */ + REG_GET(HUBPRET_CONTROL, + DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address); + REG_GET_4(DCN_EXPANSION_MODE, + DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode, + PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode, + MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode, + CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode); + + REG_GET_5(DCHUBP_REQ_SIZE_CONFIG, + CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size, + MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size, + DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size, + SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height, + PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear); + + REG_GET_5(DCHUBP_REQ_SIZE_CONFIG_C, + CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size, + MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size, + DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size, + SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height, + PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear); + + REG_GET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, + MC_VM_SYSTEM_APERTURE_HIGH_ADDR, &rq_regs->aperture_high_addr); + + REG_GET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, + MC_VM_SYSTEM_APERTURE_LOW_ADDR, &rq_regs->aperture_low_addr); + + /* DLG - Per hubp */ + REG_GET_2(BLANK_OFFSET_0, + REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end, + DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end); + + REG_GET(BLANK_OFFSET_1, + MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start); + + REG_GET(DST_DIMENSIONS, + REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal); + + REG_GET_2(DST_AFTER_SCALER, + REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler, + DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler); + + REG_GET_2(PREFETCH_SETTINGS, + DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, &dlg_attr->vratio_prefetch); + + REG_GET_2(VBLANK_PARAMETERS_0, + DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank); + + REG_GET(REF_FREQ_TO_PIX_FREQ, + REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq); + + /* DLG - Per luma/chroma */ + REG_GET(VBLANK_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l); + + REG_GET(VBLANK_PARAMETERS_3, + REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l); + + REG_GET(NOM_PARAMETERS_0, + DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l); + + REG_GET(NOM_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l); + + REG_GET(NOM_PARAMETERS_4, + DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l); + + REG_GET(NOM_PARAMETERS_5, + REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l); + + REG_GET_2(PER_LINE_DELIVERY_PRE, + REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c); + + REG_GET_2(PER_LINE_DELIVERY, + REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l, + REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c); + + REG_GET(PREFETCH_SETTINGS_C, + VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c); + + REG_GET(VBLANK_PARAMETERS_2, + REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c); + + REG_GET(VBLANK_PARAMETERS_4, + REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c); + + REG_GET(NOM_PARAMETERS_2, + DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c); + + REG_GET(NOM_PARAMETERS_3, + REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c); + + REG_GET(NOM_PARAMETERS_6, + DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c); + + REG_GET(NOM_PARAMETERS_7, + REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c); + + /* TTU - per hubp */ + REG_GET_2(DCN_TTU_QOS_WM, + QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm, + QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm); + + REG_GET_2(DCN_GLOBAL_TTU_CNTL, + MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank, + QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip); + + /* TTU - per luma/chroma */ + /* Assumed surf0 is luma and 1 is chroma */ + + REG_GET_3(DCN_SURF0_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l); + + REG_GET(DCN_SURF0_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_l); + + REG_GET_3(DCN_SURF1_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c); + + REG_GET(DCN_SURF1_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_c); + + /* Rest of hubp */ + REG_GET(DCSURF_SURFACE_CONFIG, + SURFACE_PIXEL_FORMAT, &s->pixel_format); + + REG_GET(DCSURF_SURFACE_EARLIEST_INUSE_HIGH, + SURFACE_EARLIEST_INUSE_ADDRESS_HIGH, &s->inuse_addr_hi); + + REG_GET(DCSURF_SURFACE_EARLIEST_INUSE, + SURFACE_EARLIEST_INUSE_ADDRESS, &s->inuse_addr_lo); + + REG_GET_2(DCSURF_PRI_VIEWPORT_DIMENSION, + PRI_VIEWPORT_WIDTH, &s->viewport_width, + PRI_VIEWPORT_HEIGHT, &s->viewport_height); + + REG_GET_2(DCSURF_SURFACE_CONFIG, + ROTATION_ANGLE, &s->rotation_angle, + H_MIRROR_EN, &s->h_mirror_en); + + REG_GET(DCSURF_TILING_CONFIG, + SW_MODE, &s->sw_mode); + + REG_GET(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, &s->dcc_en); + + REG_GET_3(DCHUBP_CNTL, + HUBP_BLANK_EN, &s->blank_en, + HUBP_TTU_DISABLE, &s->ttu_disable, + HUBP_UNDERFLOW_STATUS, &s->underflow_status); + + REG_GET(HUBP_CLK_CNTL, + HUBP_CLOCK_ENABLE, &s->clock_en); + + REG_GET(DCN_GLOBAL_TTU_CNTL, + MIN_TTU_VBLANK, &s->min_ttu_vblank); + + REG_GET_2(DCN_TTU_QOS_WM, + QoS_LEVEL_LOW_WM, &s->qos_level_low_wm, + QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm); + + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_lo); + + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_hi); + + s->uclk_pstate_force = REG_READ(UCLK_PSTATE_FORCE); + + s->hubp_cntl = REG_READ(DCHUBP_CNTL); + s->flip_control = REG_READ(DCSURF_FLIP_CONTROL); +} + +void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable); + + /* To ensure that cursor fetching starts as early as possible in + * the display prefetch, set CURSOR_REQ_MODE = 1 always. + * The setting of CURSOR_REQ_MODE = 0 is no longer supported in + * DCN4x as a fall back to legacy behavior of fetching cursor + * just before it appears on the screen. + */ + REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1); +} + +static struct hubp_funcs dcn401_hubp_funcs = { + .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, + .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, + .hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr, + .hubp_program_surface_config = hubp401_program_surface_config, + .hubp_is_flip_pending = hubp2_is_flip_pending, + .hubp_setup = hubp401_setup, + .hubp_setup_interdependent = hubp401_setup_interdependent, + .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings, + .set_blank = hubp2_set_blank, + .set_blank_regs = hubp2_set_blank_regs, + .mem_program_viewport = hubp401_set_viewport, + .set_cursor_attributes = hubp32_cursor_set_attributes, + .set_cursor_position = hubp401_cursor_set_position, + .hubp_clk_cntl = hubp2_clk_cntl, + .hubp_vtg_sel = hubp2_vtg_sel, + .dmdata_set_attributes = hubp3_dmdata_set_attributes, + .dmdata_load = hubp2_dmdata_load, + .dmdata_status_done = hubp2_dmdata_status_done, + .hubp_read_state = hubp401_read_state, + .hubp_clear_underflow = hubp2_clear_underflow, + .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, + .hubp_init = hubp401_init, + .set_unbounded_requesting = hubp401_set_unbounded_requesting, + .hubp_soft_reset = hubp31_soft_reset, + .hubp_set_flip_int = hubp401_set_flip_int, + .hubp_in_blank = hubp401_in_blank, + .hubp_update_force_pstate_disallow = hubp32_update_force_pstate_disallow, + .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, + .hubp_update_mall_sel = hubp401_update_mall_sel, + .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, + .hubp_update_3dlut_fl_bias_scale = hubp401_update_3dlut_fl_bias_scale, + .hubp_program_3dlut_fl_mode = hubp401_program_3dlut_fl_mode, + .hubp_program_3dlut_fl_format = hubp401_program_3dlut_fl_format, + .hubp_program_3dlut_fl_addr = hubp401_program_3dlut_fl_addr, + .hubp_program_3dlut_fl_dlg_param = hubp401_program_3dlut_fl_dlg_param, + .hubp_enable_3dlut_fl = hubp401_enable_3dlut_fl, + .hubp_program_3dlut_fl_addressing_mode = hubp401_program_3dlut_fl_addressing_mode, + .hubp_program_3dlut_fl_width = hubp401_program_3dlut_fl_width, + .hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected, + .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, + .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done +}; + +bool hubp401_construct( + struct dcn20_hubp *hubp2, + struct dc_context *ctx, + uint32_t inst, + const struct dcn_hubp2_registers *hubp_regs, + const struct dcn_hubp2_shift *hubp_shift, + const struct dcn_hubp2_mask *hubp_mask) +{ + hubp2->base.funcs = &dcn401_hubp_funcs; + hubp2->base.ctx = ctx; + hubp2->hubp_regs = hubp_regs; + hubp2->hubp_shift = hubp_shift; + hubp2->hubp_mask = hubp_mask; + hubp2->base.inst = inst; + hubp2->base.opp_id = OPP_ID_INVALID; + hubp2->base.mpcc_id = 0xf; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.h new file mode 100644 index 000000000000..e0cec898a2c0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_hubp.h @@ -0,0 +1,331 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_HUBP_DCN401_H__ +#define __DC_HUBP_DCN401_H__ + +#include "dcn20/dcn20_hubp.h" +#include "dcn21/dcn21_hubp.h" +#include "dcn30/dcn30_hubp.h" +#include "dcn31/dcn31_hubp.h" +#include "dcn32/dcn32_hubp.h" +#include "dml2/dml21/inc/dml_top_dchub_registers.h" + +#define HUBP_3DLUT_FL_REG_LIST_DCN401(inst)\ + SRI_ARR_US(_3DLUT_FL_CONFIG, HUBP, inst),\ + SRI_ARR_US(_3DLUT_FL_BIAS_SCALE, HUBP, inst),\ + SRI_ARR(HUBP_3DLUT_ADDRESS_HIGH, CURSOR0_, inst),\ + SRI_ARR(HUBP_3DLUT_ADDRESS_LOW, CURSOR0_, inst),\ + SRI_ARR(HUBP_3DLUT_CONTROL, CURSOR0_, inst),\ + SRI_ARR(HUBP_3DLUT_DLG_PARAM, CURSOR0_, inst) + +#define HUBP_MASK_SH_LIST_DCN401(mask_sh)\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, REFCYC_PER_VM_DMDATA, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_FAULT_STATUS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_FAULT_STATUS_CLEAR, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_UNDERFLOW_STATUS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_LATE_STATUS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_UNDERFLOW_STATUS_CLEAR, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_DONE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_CLEAR, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_IN_BLANK, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SOFT_RESET, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, MAX_COMPRESSED_FRAGS, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PKRS, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_TILING_CONFIG, SW_MODE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_PITCH_C, PITCH_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_TYPE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_PENDING, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_UPDATE_LOCK, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START, PRI_VIEWPORT_X_START, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START, PRI_VIEWPORT_Y_START, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION, SEC_VIEWPORT_WIDTH, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION, SEC_VIEWPORT_HEIGHT, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START, SEC_VIEWPORT_X_START, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START, SEC_VIEWPORT_Y_START, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_WIDTH_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_HEIGHT_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_X_START_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_Y_START_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_WIDTH_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_HEIGHT_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_X_START_C, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_Y_START_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, PRIMARY_SURFACE_ADDRESS_HIGH, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS, PRIMARY_SURFACE_ADDRESS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, SECONDARY_SURFACE_ADDRESS_HIGH, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS, SECONDARY_SURFACE_ADDRESS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, PRIMARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_C, PRIMARY_SURFACE_ADDRESS_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, SECONDARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_C, SECONDARY_SURFACE_ADDRESS_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE, SURFACE_INUSE_ADDRESS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH, SURFACE_INUSE_ADDRESS_HIGH, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_C, SURFACE_INUSE_ADDRESS_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH_C, SURFACE_INUSE_ADDRESS_HIGH_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE, SURFACE_EARLIEST_INUSE_ADDRESS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_HIGH, SURFACE_EARLIEST_INUSE_ADDRESS_HIGH, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_C, SURFACE_EARLIEST_INUSE_ADDRESS_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, SURFACE_EARLIEST_INUSE_ADDRESS_HIGH_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_Y_G, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_ALPHA, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_CONTROL, PACK_3TO2_ELEMENT_DISABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, DRQ_EXPANSION_MODE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, PRQ_EXPANSION_MODE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, MRQ_EXPANSION_MODE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, CRQ_EXPANSION_MODE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, CHUNK_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, MIN_CHUNK_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, DPTE_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, PTE_ROW_HEIGHT_LINEAR, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, CHUNK_SIZE_C, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, MIN_CHUNK_SIZE_C, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, DPTE_GROUP_SIZE_C, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, SWATH_HEIGHT_C, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, PTE_ROW_HEIGHT_LINEAR_C, mask_sh),\ + HUBP_SF(HUBPREQ0_BLANK_OFFSET_0, REFCYC_H_BLANK_END, mask_sh),\ + HUBP_SF(HUBPREQ0_BLANK_OFFSET_0, DLG_V_BLANK_END, mask_sh),\ + HUBP_SF(HUBPREQ0_BLANK_OFFSET_1, MIN_DST_Y_NEXT_START, mask_sh),\ + HUBP_SF(HUBPREQ0_DST_DIMENSIONS, REFCYC_PER_HTOTAL, mask_sh),\ + HUBP_SF(HUBPREQ0_DST_AFTER_SCALER, REFCYC_X_AFTER_SCALER, mask_sh),\ + HUBP_SF(HUBPREQ0_DST_AFTER_SCALER, DST_Y_AFTER_SCALER, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_0, DST_Y_PER_VM_VBLANK, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_0, DST_Y_PER_ROW_VBLANK, mask_sh),\ + HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\ + HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_C, mask_sh),\ + HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_L, mask_sh),\ + HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_HIGH_WM, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, MIN_TTU_VBLANK, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, QoS_LEVEL_FLIP, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, ROW_TTU_MODE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\ + HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh),\ + HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, ROTATION_ANGLE, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, H_MIRROR_EN, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, ALPHA_PLANE_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS, DST_Y_PREFETCH, mask_sh),\ + HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS, VRATIO_PREFETCH, mask_sh),\ + HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS_C, VRATIO_PREFETCH_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_VM_SYSTEM_APERTURE_LOW_ADDR, MC_VM_SYSTEM_APERTURE_LOW_ADDR, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, MC_VM_SYSTEM_APERTURE_HIGH_ADDR, mask_sh),\ + HUBP_SF(HUBPREQ0_CURSOR_SETTINGS, CURSOR0_DST_Y_OFFSET, mask_sh), \ + HUBP_SF(HUBPREQ0_CURSOR_SETTINGS, CURSOR0_CHUNK_HDL_ADJUST, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH, CURSOR_SURFACE_ADDRESS_HIGH, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_SURFACE_ADDRESS, CURSOR_SURFACE_ADDRESS, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_SIZE, CURSOR_WIDTH, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_SIZE, CURSOR_HEIGHT, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_REQ_MODE, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_2X_MAGNIFY, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_PITCH, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_LINES_PER_CHUNK, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_POSITION, CURSOR_X_POSITION, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_POSITION, CURSOR_Y_POSITION, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_X, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_Y, mask_sh), \ + HUBP_SF(CURSOR0_0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_ADDRESS_HIGH, DMDATA_ADDRESS_HIGH, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_MODE, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_UPDATED, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_REPEAT, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_SIZE, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_UPDATED, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_REPEAT, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_SIZE, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_QOS_MODE, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_QOS_LEVEL, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_DL_DELTA, mask_sh), \ + HUBP_SF(CURSOR0_0_DMDATA_STATUS, DMDATA_DONE, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_0, DST_Y_PER_VM_FLIP, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_0, DST_Y_PER_ROW_FLIP, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_1, REFCYC_PER_PTE_GROUP_FLIP_L, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_2, REFCYC_PER_META_CHUNK_FLIP_L, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE_STOP_DATA_DURING_VM, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, HUBPREQ_MASTER_UPDATE_LOCK_STATUS, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL2, SURFACE_GSL_ENABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL2, SURFACE_TRIPLE_BUFFER_ENABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_VMID_SETTINGS_0, VMID, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_3, REFCYC_PER_VM_GROUP_FLIP, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_4, REFCYC_PER_VM_REQ_FLIP, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_5, REFCYC_PER_PTE_GROUP_FLIP_C, mask_sh),\ + HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\ + HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, USE_MALL_SEL, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, VMPG_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, PTE_BUFFER_MODE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, BIGK_FRAGMENT_SIZE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, FORCE_ONE_ROW_FOR_FRAME, mask_sh),\ + HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, DATA_UCLK_PSTATE_FORCE_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, DATA_UCLK_PSTATE_FORCE_VALUE, mask_sh),\ + HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, CURSOR_UCLK_PSTATE_FORCE_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, CURSOR_UCLK_PSTATE_FORCE_VALUE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, MALL_PREF_CMD_TYPE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, MALL_PREF_MODE, mask_sh),\ + HUBP_SF(HUBP0_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mask_sh),\ + HUBP_SF(HUBP0_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, mask_sh),\ + HUBP_SF(HUBP0_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, mask_sh),\ + HUBP_SF(HUBP0_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_SCALE, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_ENABLE, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_DONE, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_Y_G, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_CB_B, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_CR_R, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_LOW, HUBP_3DLUT_ADDRESS_LOW, mask_sh),\ + HUBP_SF(CURSOR0_0_HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, mask_sh),\ + +void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); + +void hubp401_vready_at_or_After_vsync(struct hubp *hubp, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + +void hubp401_program_requestor( + struct hubp *hubp, + struct _vcs_dpi_display_rq_regs_st *rq_regs); + +void hubp401_program_deadline( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr); + +void hubp401_setup( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr, + struct _vcs_dpi_display_rq_regs_st *rq_regs, + struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + +void hubp401_setup_interdependent( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr); + +bool hubp401_program_surface_flip_and_addr( + struct hubp *hubp, + const struct dc_plane_address *address, + bool flip_immediate); + +void hubp401_dcc_control(struct hubp *hubp, + struct dc_plane_dcc_param *dcc); + +void hubp401_program_tiling( + struct dcn20_hubp *hubp2, + const union dc_tiling_info *info, + const enum surface_pixel_format pixel_format); + +void hubp401_program_size( + struct hubp *hubp, + enum surface_pixel_format format, + const struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc); + +void hubp401_program_surface_config( + struct hubp *hubp, + enum surface_pixel_format format, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + enum dc_rotation_angle rotation, + struct dc_plane_dcc_param *dcc, + bool horizontal_mirror, + unsigned int compat_level); + +void hubp401_set_viewport(struct hubp *hubp, + const struct rect *viewport, + const struct rect *viewport_c); + +void hubp401_set_flip_int(struct hubp *hubp); + +bool hubp401_in_blank(struct hubp *hubp); + +void hubp401_cursor_set_position( + struct hubp *hubp, + const struct dc_cursor_position *pos, + const struct dc_cursor_mi_param *param); + +void hubp401_read_state(struct hubp *hubp); + +bool hubp401_construct( + struct dcn20_hubp *hubp2, + struct dc_context *ctx, + uint32_t inst, + const struct dcn_hubp2_registers *hubp_regs, + const struct dcn_hubp2_shift *hubp_shift, + const struct dcn_hubp2_mask *hubp_mask); + +void hubp401_init(struct hubp *hubp); + +int hubp401_get_3dlut_fl_done(struct hubp *hubp); + +void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable); + +#endif /* __DC_HUBP_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.c new file mode 100644 index 000000000000..ba5f1dec8b68 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.c @@ -0,0 +1,645 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "dc.h" +#include "dcn401_mpc.h" +#include "dcn10/dcn10_cm_common.h" +#include "basics/conversion.h" +#include "mpc.h" + +#define REG(reg)\ + mpc401->mpc_regs->reg + +#define CTX \ + mpc401->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + mpc401->mpc_shift->field_name, mpc401->mpc_mask->field_name + +static void mpc401_update_3dlut_fast_load_select(struct mpc *mpc, int mpcc_id, int hubp_idx) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + REG_SET(MPCC_MCM_3DLUT_FAST_LOAD_SELECT[mpcc_id], 0, MPCC_MCM_3DLUT_FL_SEL, hubp_idx); +} + +static void mpc401_get_3dlut_fast_load_status(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + REG_GET_3(MPCC_MCM_3DLUT_FAST_LOAD_STATUS[mpcc_id], + MPCC_MCM_3DLUT_FL_DONE, done, + MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, soft_underflow, + MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, hard_underflow); +} + +void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + switch (location) { + case MPCC_MOVABLE_CM_LOCATION_BEFORE: + REG_UPDATE(MPCC_MOVABLE_CM_LOCATION_CONTROL[mpcc_id], + MPCC_MOVABLE_CM_LOCATION_CNTL, 0); + break; + case MPCC_MOVABLE_CM_LOCATION_AFTER: + REG_UPDATE(MPCC_MOVABLE_CM_LOCATION_CONTROL[mpcc_id], + MPCC_MOVABLE_CM_LOCATION_CNTL, 1); + break; + } +} + +static enum dc_lut_mode get3dlut_config( + struct mpc *mpc, + bool *is_17x17x17, + bool *is_12bits_color_channel, + int mpcc_id) +{ + uint32_t i_mode, i_enable_10bits, lut_size; + enum dc_lut_mode mode; + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + REG_GET(MPCC_MCM_3DLUT_MODE[mpcc_id], + MPCC_MCM_3DLUT_MODE_CURRENT, &i_mode); + + REG_GET(MPCC_MCM_3DLUT_READ_WRITE_CONTROL[mpcc_id], + MPCC_MCM_3DLUT_30BIT_EN, &i_enable_10bits); + + switch (i_mode) { + case 0: + mode = LUT_BYPASS; + break; + case 1: + mode = LUT_RAM_A; + break; + case 2: + mode = LUT_RAM_B; + break; + default: + mode = LUT_BYPASS; + break; + } + if (i_enable_10bits > 0) + *is_12bits_color_channel = false; + else + *is_12bits_color_channel = true; + + REG_GET(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_SIZE, &lut_size); + + if (lut_size == 0) + *is_17x17x17 = true; + else + *is_17x17x17 = false; + + return mode; +} + +void mpc401_populate_lut(struct mpc *mpc, const enum MCM_LUT_ID id, const union mcm_lut_params params, bool lut_bank_a, int mpcc_id) +{ + const enum dc_lut_mode next_mode = lut_bank_a ? LUT_RAM_A : LUT_RAM_B; + const struct pwl_params *lut1d = params.pwl; + const struct pwl_params *lut_shaper = params.pwl; + bool is_17x17x17; + bool is_12bits_color_channel; + const struct dc_rgb *lut0; + const struct dc_rgb *lut1; + const struct dc_rgb *lut2; + const struct dc_rgb *lut3; + int lut_size0; + int lut_size; + const struct tetrahedral_params *lut3d = params.lut3d; + + switch (id) { + case MCM_LUT_1DLUT: + if (lut1d == NULL) + return; + + mpc32_power_on_blnd_lut(mpc, mpcc_id, true); + mpc32_configure_post1dlut(mpc, mpcc_id, next_mode == LUT_RAM_A); + + if (next_mode == LUT_RAM_A) + mpc32_program_post1dluta_settings(mpc, mpcc_id, lut1d); + else + mpc32_program_post1dlutb_settings(mpc, mpcc_id, lut1d); + + mpc32_program_post1dlut_pwl( + mpc, mpcc_id, lut1d->rgb_resulted, lut1d->hw_points_num); + + break; + case MCM_LUT_SHAPER: + if (lut_shaper == NULL) + return; + if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) + mpc32_power_on_shaper_3dlut(mpc, mpcc_id, true); + + mpc32_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, mpcc_id); + + if (next_mode == LUT_RAM_A) + mpc32_program_shaper_luta_settings(mpc, lut_shaper, mpcc_id); + else + mpc32_program_shaper_lutb_settings(mpc, lut_shaper, mpcc_id); + + mpc32_program_shaper_lut( + mpc, lut_shaper->rgb_resulted, lut_shaper->hw_points_num, mpcc_id); + + mpc32_power_on_shaper_3dlut(mpc, mpcc_id, false); + break; + case MCM_LUT_3DLUT: + if (lut3d == NULL) + return; + + mpc32_power_on_shaper_3dlut(mpc, mpcc_id, true); + + get3dlut_config(mpc, &is_17x17x17, &is_12bits_color_channel, mpcc_id); + + is_17x17x17 = !lut3d->use_tetrahedral_9; + is_12bits_color_channel = lut3d->use_12bits; + if (is_17x17x17) { + lut0 = lut3d->tetrahedral_17.lut0; + lut1 = lut3d->tetrahedral_17.lut1; + lut2 = lut3d->tetrahedral_17.lut2; + lut3 = lut3d->tetrahedral_17.lut3; + lut_size0 = sizeof(lut3d->tetrahedral_17.lut0)/ + sizeof(lut3d->tetrahedral_17.lut0[0]); + lut_size = sizeof(lut3d->tetrahedral_17.lut1)/ + sizeof(lut3d->tetrahedral_17.lut1[0]); + } else { + lut0 = lut3d->tetrahedral_9.lut0; + lut1 = lut3d->tetrahedral_9.lut1; + lut2 = lut3d->tetrahedral_9.lut2; + lut3 = lut3d->tetrahedral_9.lut3; + lut_size0 = sizeof(lut3d->tetrahedral_9.lut0)/ + sizeof(lut3d->tetrahedral_9.lut0[0]); + lut_size = sizeof(lut3d->tetrahedral_9.lut1)/ + sizeof(lut3d->tetrahedral_9.lut1[0]); + } + + mpc32_select_3dlut_ram(mpc, next_mode, + is_12bits_color_channel, mpcc_id); + mpc32_select_3dlut_ram_mask(mpc, 0x1, mpcc_id); + if (is_12bits_color_channel) + mpc32_set3dlut_ram12(mpc, lut0, lut_size0, mpcc_id); + else + mpc32_set3dlut_ram10(mpc, lut0, lut_size0, mpcc_id); + + mpc32_select_3dlut_ram_mask(mpc, 0x2, mpcc_id); + if (is_12bits_color_channel) + mpc32_set3dlut_ram12(mpc, lut1, lut_size, mpcc_id); + else + mpc32_set3dlut_ram10(mpc, lut1, lut_size, mpcc_id); + + mpc32_select_3dlut_ram_mask(mpc, 0x4, mpcc_id); + if (is_12bits_color_channel) + mpc32_set3dlut_ram12(mpc, lut2, lut_size, mpcc_id); + else + mpc32_set3dlut_ram10(mpc, lut2, lut_size, mpcc_id); + + mpc32_select_3dlut_ram_mask(mpc, 0x8, mpcc_id); + if (is_12bits_color_channel) + mpc32_set3dlut_ram12(mpc, lut3, lut_size, mpcc_id); + else + mpc32_set3dlut_ram10(mpc, lut3, lut_size, mpcc_id); + + if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) + mpc32_power_on_shaper_3dlut(mpc, mpcc_id, false); + + break; + } + +} + +void mpc401_program_lut_mode( + struct mpc *mpc, + const enum MCM_LUT_ID id, + const enum MCM_LUT_XABLE xable, + bool lut_bank_a, + int mpcc_id) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + switch (id) { + case MCM_LUT_3DLUT: + switch (xable) { + case MCM_LUT_DISABLE: + REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_MODE, 0); + break; + case MCM_LUT_ENABLE: + REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_MODE, lut_bank_a ? 1 : 2); + break; + } + break; + case MCM_LUT_SHAPER: + switch (xable) { + case MCM_LUT_DISABLE: + REG_UPDATE(MPCC_MCM_SHAPER_CONTROL[mpcc_id], MPCC_MCM_SHAPER_LUT_MODE, 0); + break; + case MCM_LUT_ENABLE: + REG_UPDATE(MPCC_MCM_SHAPER_CONTROL[mpcc_id], MPCC_MCM_SHAPER_LUT_MODE, lut_bank_a ? 1 : 2); + break; + } + break; + case MCM_LUT_1DLUT: + switch (xable) { + case MCM_LUT_DISABLE: + REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id], + MPCC_MCM_1DLUT_MODE, 0); + break; + case MCM_LUT_ENABLE: + REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id], + MPCC_MCM_1DLUT_MODE, 2); + break; + } + REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id], + MPCC_MCM_1DLUT_SELECT, lut_bank_a ? 0 : 1); + break; + } +} + +void mpc401_program_lut_read_write_control(struct mpc *mpc, const enum MCM_LUT_ID id, bool lut_bank_a, int mpcc_id) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + switch (id) { + case MCM_LUT_3DLUT: + mpc32_select_3dlut_ram_mask(mpc, 0xf, mpcc_id); + REG_UPDATE(MPCC_MCM_3DLUT_READ_WRITE_CONTROL[mpcc_id], MPCC_MCM_3DLUT_RAM_SEL, lut_bank_a ? 0 : 1); + break; + case MCM_LUT_SHAPER: + mpc32_configure_shaper_lut(mpc, lut_bank_a, mpcc_id); + break; + case MCM_LUT_1DLUT: + mpc32_configure_post1dlut(mpc, lut_bank_a, mpcc_id); + break; + } +} + +static void program_gamut_remap( + struct mpc *mpc, + unsigned int mpcc_id, + const uint16_t *regval, + enum mpcc_gamut_remap_id gamut_remap_block_id, + enum mpcc_gamut_remap_mode_select mode_select) +{ + struct color_matrices_reg gamut_regs; + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + switch (gamut_remap_block_id) { + case MPCC_OGAM_GAMUT_REMAP: + + if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) { + REG_SET(MPCC_GAMUT_REMAP_MODE[mpcc_id], 0, + MPCC_GAMUT_REMAP_MODE, mode_select); + return; + } + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C12_A; + + switch (mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + + cm_helper_program_color_matrices( + mpc->ctx, + regval, + &gamut_regs); + + //select coefficient set to use, set A (MODE_1) or set B (MODE_2) + REG_SET(MPCC_GAMUT_REMAP_MODE[mpcc_id], 0, MPCC_GAMUT_REMAP_MODE, mode_select); + break; + + case MPCC_MCM_FIRST_GAMUT_REMAP: + if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) { + REG_SET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], 0, + MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mode_select); + return; + } + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A; + + switch (mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + + cm_helper_program_color_matrices( + mpc->ctx, + regval, + &gamut_regs); + + //select coefficient set to use, set A (MODE_1) or set B (MODE_2) + REG_SET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], 0, + MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mode_select); + break; + + case MPCC_MCM_SECOND_GAMUT_REMAP: + if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) { + REG_SET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], 0, + MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mode_select); + return; + } + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A; + + switch (mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + + cm_helper_program_color_matrices( + mpc->ctx, + regval, + &gamut_regs); + + //select coefficient set to use, set A (MODE_1) or set B (MODE_2) + REG_SET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], 0, + MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mode_select); + break; + + default: + break; + } +} + +void mpc401_set_gamut_remap( + struct mpc *mpc, + int mpcc_id, + const struct mpc_grph_gamut_adjustment *adjust) +{ + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + unsigned int i = 0; + uint32_t mode_select = 0; + + if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) { + /* Bypass / Disable if type is bypass or hw */ + program_gamut_remap(mpc, mpcc_id, NULL, + adjust->mpcc_gamut_remap_block_id, MPCC_GAMUT_REMAP_MODE_SELECT_0); + } else { + struct fixed31_32 arr_matrix[12]; + uint16_t arr_reg_val[12]; + + for (i = 0; i < 12; i++) + arr_matrix[i] = adjust->temperature_matrix[i]; + + convert_float_matrix(arr_reg_val, arr_matrix, 12); + + switch (adjust->mpcc_gamut_remap_block_id) { + case MPCC_OGAM_GAMUT_REMAP: + REG_GET(MPCC_GAMUT_REMAP_MODE[mpcc_id], + MPCC_GAMUT_REMAP_MODE_CURRENT, &mode_select); + break; + case MPCC_MCM_FIRST_GAMUT_REMAP: + REG_GET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], + MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, &mode_select); + break; + case MPCC_MCM_SECOND_GAMUT_REMAP: + REG_GET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], + MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, &mode_select); + break; + default: + break; + } + + //If current set in use not set A (MODE_1), then use set A, otherwise use set B + if (mode_select != MPCC_GAMUT_REMAP_MODE_SELECT_1) + mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_1; + else + mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_2; + + program_gamut_remap(mpc, mpcc_id, arr_reg_val, + adjust->mpcc_gamut_remap_block_id, mode_select); + } +} + +static void read_gamut_remap(struct mpc *mpc, + int mpcc_id, + uint16_t *regval, + enum mpcc_gamut_remap_id gamut_remap_block_id, + uint32_t *mode_select) +{ + struct color_matrices_reg gamut_regs; + struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); + + switch (gamut_remap_block_id) { + case MPCC_OGAM_GAMUT_REMAP: + //current coefficient set in use + REG_GET(MPCC_GAMUT_REMAP_MODE[mpcc_id], MPCC_GAMUT_REMAP_MODE_CURRENT, mode_select); + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C12_A; + + switch (*mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + break; + + case MPCC_MCM_FIRST_GAMUT_REMAP: + REG_GET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], + MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, mode_select); + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A; + + switch (*mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + break; + + case MPCC_MCM_SECOND_GAMUT_REMAP: + REG_GET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], + MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, mode_select); + + gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A; + gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A; + gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A; + gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A; + + switch (*mode_select) { + case MPCC_GAMUT_REMAP_MODE_SELECT_1: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[mpcc_id]); + break; + case MPCC_GAMUT_REMAP_MODE_SELECT_2: + gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[mpcc_id]); + gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[mpcc_id]); + break; + default: + break; + } + break; + + default: + break; + } + + if (*mode_select != MPCC_GAMUT_REMAP_MODE_SELECT_0) { + cm_helper_read_color_matrices( + mpc401->base.ctx, + regval, + &gamut_regs); + } +} + +void mpc401_get_gamut_remap(struct mpc *mpc, + int mpcc_id, + struct mpc_grph_gamut_adjustment *adjust) +{ + uint16_t arr_reg_val[12]; + uint32_t mode_select; + + read_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, &mode_select); + + if (mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) { + adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; + return; + } + + adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; + convert_hw_matrix(adjust->temperature_matrix, + arr_reg_val, ARRAY_SIZE(arr_reg_val)); +} + +static const struct mpc_funcs dcn401_mpc_funcs = { + .read_mpcc_state = mpc1_read_mpcc_state, + .insert_plane = mpc1_insert_plane, + .remove_mpcc = mpc1_remove_mpcc, + .mpc_init = mpc32_mpc_init, + .mpc_init_single_inst = mpc3_mpc_init_single_inst, + .update_blending = mpc2_update_blending, + .cursor_lock = mpc1_cursor_lock, + .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, + .wait_for_idle = mpc2_assert_idle_mpcc, + .assert_mpcc_idle_before_connect = mpc2_assert_mpcc_idle_before_connect, + .init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw, + .set_denorm = mpc3_set_denorm, + .set_denorm_clamp = mpc3_set_denorm_clamp, + .set_output_csc = mpc3_set_output_csc, + .set_ocsc_default = mpc3_set_ocsc_default, + .set_output_gamma = mpc3_set_output_gamma, + .insert_plane_to_secondary = NULL, + .remove_mpcc_from_secondary = NULL, + .set_dwb_mux = mpc3_set_dwb_mux, + .disable_dwb_mux = mpc3_disable_dwb_mux, + .is_dwb_idle = mpc3_is_dwb_idle, + .set_gamut_remap = mpc401_set_gamut_remap, + .program_shaper = mpc32_program_shaper, + .program_3dlut = mpc32_program_3dlut, + .program_1dlut = mpc32_program_post1dlut, + .acquire_rmu = NULL, + .release_rmu = NULL, + .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut, + .get_mpc_out_mux = mpc1_get_mpc_out_mux, + .set_bg_color = mpc1_set_bg_color, + .set_movable_cm_location = mpc401_set_movable_cm_location, + .update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select, + .get_3dlut_fast_load_status = mpc401_get_3dlut_fast_load_status, + .populate_lut = mpc401_populate_lut, + .program_lut_read_write_control = mpc401_program_lut_read_write_control, + .program_lut_mode = mpc401_program_lut_mode, +}; + + +void dcn401_mpc_construct(struct dcn401_mpc *mpc401, + struct dc_context *ctx, + const struct dcn401_mpc_registers *mpc_regs, + const struct dcn401_mpc_shift *mpc_shift, + const struct dcn401_mpc_mask *mpc_mask, + int num_mpcc, + int num_rmu) +{ + int i; + + mpc401->base.ctx = ctx; + + mpc401->base.funcs = &dcn401_mpc_funcs; + + mpc401->mpc_regs = mpc_regs; + mpc401->mpc_shift = mpc_shift; + mpc401->mpc_mask = mpc_mask; + + mpc401->mpcc_in_use_mask = 0; + mpc401->num_mpcc = num_mpcc; + mpc401->num_rmu = num_rmu; + + for (i = 0; i < MAX_MPCC; i++) + mpc3_init_mpcc(&mpc401->base.mpcc_array[i], i); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.h new file mode 100644 index 000000000000..a8ef67695757 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn401/dcn401_mpc.h @@ -0,0 +1,234 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_MPCC_DCN401_H__ +#define __DC_MPCC_DCN401_H__ +#include "dcn30/dcn30_mpc.h" +#include "dcn32/dcn32_mpc.h" + +#define TO_DCN401_MPC(mpc_base) \ + container_of(mpc_base, struct dcn401_mpc, base) + +#define MPC_REG_VARIABLE_LIST_DCN4_01 \ + MPC_REG_VARIABLE_LIST_DCN3_0; \ + MPC_REG_VARIABLE_LIST_DCN32; \ + uint32_t MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT[MAX_MPCC]; \ + uint32_t MPCC_MCM_FIRST_GAMUT_REMAP_MODE[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_B[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_B[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_B[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_B[MAX_MPCC]; \ + uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[MAX_MPCC]; \ + uint32_t MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT[MAX_MPCC]; \ + uint32_t MPCC_MCM_SECOND_GAMUT_REMAP_MODE[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_B[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_B[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_B[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_B[MAX_MPCC]; \ + uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[MAX_MPCC]; \ + uint32_t MPCC_MCM_3DLUT_FAST_LOAD_SELECT[MAX_MPCC]; \ + uint32_t MPCC_MCM_3DLUT_FAST_LOAD_STATUS[MAX_MPCC] + +#define MPC_COMMON_MASK_SH_LIST_DCN4_01(mask_sh) \ + MPC_COMMON_MASK_SH_LIST_DCN32(mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM_FIRST_GAMUT_REMAP_C11_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM_FIRST_GAMUT_REMAP_C12_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM_FIRST_GAMUT_REMAP_C13_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM_FIRST_GAMUT_REMAP_C14_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM_FIRST_GAMUT_REMAP_C21_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM_FIRST_GAMUT_REMAP_C22_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM_FIRST_GAMUT_REMAP_C23_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM_FIRST_GAMUT_REMAP_C24_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM_FIRST_GAMUT_REMAP_C31_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM_FIRST_GAMUT_REMAP_C32_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM_FIRST_GAMUT_REMAP_C33_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM_FIRST_GAMUT_REMAP_C34_A, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM_SECOND_GAMUT_REMAP_C11_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM_SECOND_GAMUT_REMAP_C12_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM_SECOND_GAMUT_REMAP_C13_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM_SECOND_GAMUT_REMAP_C14_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM_SECOND_GAMUT_REMAP_C21_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM_SECOND_GAMUT_REMAP_C22_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM_SECOND_GAMUT_REMAP_C23_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM_SECOND_GAMUT_REMAP_C24_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM_SECOND_GAMUT_REMAP_C31_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM_SECOND_GAMUT_REMAP_C32_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM_SECOND_GAMUT_REMAP_C33_A, mask_sh), \ + SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM_SECOND_GAMUT_REMAP_C34_A, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_SELECT, MPCC_MCM_3DLUT_FL_SEL, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_DONE, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, mask_sh), \ + SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, mask_sh) + + +#define MPC_REG_LIST_DCN4_01_RI(inst) \ + MPC_REG_LIST_DCN3_2_RI(inst),\ + SRII(MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM, inst),\ + SRII(MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_B, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_B, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_B, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_B, MPCC_MCM, inst),\ + SRII(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM, inst), \ + SRII(MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_B, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_B, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_B, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_B, MPCC_MCM, inst), \ + SRII(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B, MPCC_MCM, inst), \ + SRII(MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_FAST_LOAD_SELECT, MPCC_MCM, inst) + +#define MPC_REG_FIELD_LIST_DCN4_01(type)\ + MPC_REG_FIELD_LIST_DCN3_0(type);\ + MPC_REG_FIELD_LIST_DCN32(type);\ + type MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_MODE;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C13_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C14_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C21_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C22_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C23_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C24_A;\ + type MPCC_MCM_FIRST_GAMUT_REMAP_C31_A; \ + type MPCC_MCM_FIRST_GAMUT_REMAP_C32_A; \ + type MPCC_MCM_FIRST_GAMUT_REMAP_C33_A; \ + type MPCC_MCM_FIRST_GAMUT_REMAP_C34_A; \ + type MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_MODE;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C13_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C14_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C21_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C22_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C23_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C24_A;\ + type MPCC_MCM_SECOND_GAMUT_REMAP_C31_A; \ + type MPCC_MCM_SECOND_GAMUT_REMAP_C32_A; \ + type MPCC_MCM_SECOND_GAMUT_REMAP_C33_A; \ + type MPCC_MCM_SECOND_GAMUT_REMAP_C34_A; \ + type MPCC_MCM_3DLUT_FL_SEL;\ + type MPCC_MCM_3DLUT_FL_DONE;\ + type MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW;\ + type MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW + +struct dcn401_mpc_shift { + MPC_REG_FIELD_LIST_DCN4_01(uint8_t); +}; + +struct dcn401_mpc_mask { + MPC_REG_FIELD_LIST_DCN4_01(uint32_t); +}; + +struct dcn401_mpc_registers { + MPC_REG_VARIABLE_LIST_DCN4_01; +}; + +struct dcn401_mpc { + struct mpc base; + + int mpcc_in_use_mask; + int num_mpcc; + const struct dcn401_mpc_registers *mpc_regs; + const struct dcn401_mpc_shift *mpc_shift; + const struct dcn401_mpc_mask *mpc_mask; + int num_rmu; +}; +void dcn401_mpc_construct(struct dcn401_mpc *mpc401, + struct dc_context *ctx, + const struct dcn401_mpc_registers *mpc_regs, + const struct dcn401_mpc_shift *mpc_shift, + const struct dcn401_mpc_mask *mpc_mask, + int num_mpcc, + int num_rmu); + +void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id); +void mpc401_populate_lut(struct mpc *mpc, const enum MCM_LUT_ID id, const union mcm_lut_params params, + bool lut_bank_a, int mpcc_id); + +void mpc401_program_lut_mode( + struct mpc *mpc, + const enum MCM_LUT_ID id, + const enum MCM_LUT_XABLE xable, + bool lut_bank_a, + int mpcc_id); + +void mpc401_program_lut_read_write_control( + struct mpc *mpc, + const enum MCM_LUT_ID id, + bool lut_bank_a, + int mpcc_id); + +void mpc401_set_gamut_remap( + struct mpc *mpc, + int mpcc_id, + const struct mpc_grph_gamut_adjustment *adjust); + +void mpc401_get_gamut_remap( + struct mpc *mpc, + int mpcc_id, + struct mpc_grph_gamut_adjustment *adjust); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c new file mode 100644 index 000000000000..4fbecb5ff349 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dcn401_fpu.h" +#include "dcn401/dcn401_resource.h" +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn401/dcn401_smu14_driver_if.h" +#include "link.h" + +#define DC_LOGGER_INIT(logger) + +void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; + clk_mgr->bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + +/* + * dcn401_update_bw_bounding_box + * + * This would override some dcn4_01 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + /* Override from passed dc->bb_overrides if available*/ + if (dc->bb_overrides.sr_exit_time_ns) + dc->dml2_options.bbox_overrides.sr_exit_latency_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns) + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.urgent_latency_ns) + dc->dml2_options.bbox_overrides.urgent_latency_us = + dc->bb_overrides.urgent_latency_ns / 1000.0; + + if (dc->bb_overrides.dram_clock_change_latency_ns) + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.fclk_clock_change_latency_ns) + dc->dml2_options.bbox_overrides.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = + bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = + bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dc->dml2_options.bbox_overrides.sr_exit_latency_us = + bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) { + dc->dml2_options.bbox_overrides.dram_num_chan = + dc->ctx->dc_bios->vram_info.num_chans; + + } + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dc->dml2_options.bbox_overrides.dram_chanel_width_bytes = + dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0; + dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; + dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0; + + if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) { + unsigned int i = 0; + + dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz) + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz; + } + + for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) { + if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) { + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; + } + } + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h new file mode 100644 index 000000000000..329f1788843c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DCN401_FPU_H__ +#define __DCN401_FPU_H__ + +#include "clk_mgr.h" + +void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr); + +void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c new file mode 100644 index 000000000000..b3602f897872 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -0,0 +1,1155 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml21_wrapper.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "bounding_boxes/dcn4_soc_bb.h" +#include "bounding_boxes/dcn3_soc_bb.h" + +static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + const struct dml2_soc_bb *soc_bb; + const struct dml2_soc_qos_parameters *qos_params; + + switch (in_dc->ctx->dce_version) { + case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. + soc_bb = &dml2_socbb_dcn31; + qos_params = &dml_dcn31_soc_qos_params; + break; + case DCN_VERSION_4_01: + default: + soc_bb = &dml2_socbb_dcn401; + qos_params = &dml_dcn401_soc_qos_params; + } + + /* patch soc bb */ + memcpy(&dml_init->soc_bb, soc_bb, sizeof(struct dml2_soc_bb)); + + /* patch qos params */ + memcpy(&dml_init->soc_bb.qos_parameters, qos_params, sizeof(struct dml2_soc_qos_parameters)); +} + +static void dml21_external_socbb_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config) +{ + memcpy(&dml_init->soc_bb, &config->external_socbb_ip_params->soc_bb, sizeof(struct dml2_soc_bb)); +} + +static void dml21_external_ip_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config) +{ + memcpy(&dml_init->ip_caps, &config->external_socbb_ip_params->ip_params, sizeof(struct dml2_ip_capabilities)); +} + +static void dml21_init_ip_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + const struct dml2_ip_capabilities *ip_caps; + + switch (in_dc->ctx->dce_version) { + case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. + ip_caps = &dml2_dcn31_max_ip_caps; + break; + case DCN_VERSION_4_01: + default: + ip_caps = &dml2_dcn401_max_ip_caps; + } + + memcpy(&dml_init->ip_caps, ip_caps, sizeof(struct dml2_ip_capabilities)); +} + +void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + if (config->use_native_soc_bb_construction) + dml21_init_socbb_params(dml_init, config, in_dc); + else + dml21_external_socbb_params(dml_init, config); +} + +void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, + const struct dc *in_dc) +{ + if (config->use_native_soc_bb_construction) + dml21_init_ip_params(dml_init, config, in_dc); + else + dml21_external_ip_params(dml_init, config); +} + +void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_init, + const struct dml2_configuration_options *config, const struct dc *in_dc) +{ + int i; + + const struct clk_bw_params *dc_bw_params = in_dc->clk_mgr->bw_params; + const struct clk_limit_table *dc_clk_table = &dc_bw_params->clk_table; + struct dml2_soc_bb *dml_soc_bb = &dml_init->soc_bb; + struct dml2_soc_state_table *dml_clk_table = &dml_soc_bb->clk_table; + + /* override clocks if smu is present */ + if (in_dc->clk_mgr && + in_dc->clk_mgr->funcs->is_smu_present && + in_dc->clk_mgr->funcs->is_smu_present(in_dc->clk_mgr)) { + /* dcfclk */ + if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) { + dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->dcfclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz && + dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000; + dml_clk_table->dcfclk.num_clk_values = i + 1; + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + dml_clk_table->dcfclk.num_clk_values = i; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + } + } + } + + /* fclk */ + if (dc_clk_table->num_entries_per_clk.num_fclk_levels) { + dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->fclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz && + dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) { + dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000; + dml_clk_table->fclk.num_clk_values = i + 1; + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + dml_clk_table->fclk.num_clk_values = i; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + } + } + } + + /* uclk */ + if (dc_clk_table->num_entries_per_clk.num_memclk_levels) { + dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->uclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz && + dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) { + dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000; + dml_clk_table->uclk.num_clk_values = i + 1; + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + dml_clk_table->uclk.num_clk_values = i; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + } + } + } + + /* dispclk */ + if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) { + dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->dispclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz && + dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) { + dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000; + dml_clk_table->dispclk.num_clk_values = i + 1; + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + dml_clk_table->dispclk.num_clk_values = i; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + } + } + } + + /* dppclk */ + if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) { + dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->dppclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz && + dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) { + dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000; + dml_clk_table->dppclk.num_clk_values = i + 1; + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + dml_clk_table->dppclk.num_clk_values = i; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + } + } + } + + /* dtbclk */ + if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) { + dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->dtbclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz && + dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000; + dml_clk_table->dtbclk.num_clk_values = i + 1; + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + dml_clk_table->dtbclk.num_clk_values = i; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + } + } + } + + /* socclk */ + if (dc_clk_table->num_entries_per_clk.num_socclk_levels) { + dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels; + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + if (i < dml_clk_table->socclk.num_clk_values) { + if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz && + dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) { + dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000; + dml_clk_table->socclk.num_clk_values = i + 1; + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + dml_clk_table->socclk.num_clk_values = i; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + } + } + } + + /* do not override phyclks for now */ + /* phyclk */ + // dml_clk_table->phyclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_levels; + // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + // dml_clk_table->phyclk.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_mhz * 1000; + // } + + /* phyclk_d18 */ + // dml_clk_table->phyclk_d18.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_d18_levels; + // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + // dml_clk_table->phyclk_d18.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_d18_mhz * 1000; + // } + + /* phyclk_d32 */ + // dml_clk_table->phyclk_d32.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_d32_levels; + // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + // dml_clk_table->phyclk_d32.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_d32_mhz * 1000; + // } + } + + dml_soc_bb->dchub_refclk_mhz = in_dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + dml_soc_bb->dprefclk_mhz = in_dc->clk_mgr->dprefclk_khz / 1000; + dml_soc_bb->xtalclk_mhz = in_dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000; + dml_soc_bb->dispclk_dppclk_vco_speed_mhz = in_dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* override bounding box paramters from VBIOS */ + if (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns > 0) + dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = + (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns + 9) / 10; + + if (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns > 0) + dml_soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns + 9) / 10; + + if (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns > 0) + dml_soc_bb->power_management_parameters.stutter_exit_latency_us = + (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; + + if (in_dc->ctx->dc_bios->vram_info.num_chans) { + dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; + //dml_soc_bb->mall_allocated_for_dcn_mbytes = TODO; + } + + if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { + dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + } + + /* override bounding box paramters from DC config */ + if (in_dc->bb_overrides.sr_exit_time_ns) { + dml_soc_bb->power_management_parameters.stutter_exit_latency_us = + in_dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if (in_dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dml_soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + in_dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if (in_dc->bb_overrides.dram_clock_change_latency_ns) { + dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = + in_dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if (in_dc->bb_overrides.fclk_clock_change_latency_ns) { + dml_soc_bb->power_management_parameters.fclk_change_blackout_us = + in_dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0; + } + + //TODO + // if (in_dc->bb_overrides.dummy_clock_change_latency_ns) { + // dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = + // in_dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + // } +} + +static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, + struct dc_stream_state *stream) +{ + unsigned int hblank_start, vblank_start; + + timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; + timing->h_front_porch = stream->timing.h_front_porch; + timing->v_front_porch = stream->timing.v_front_porch; + timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + timing->pixel_clock_khz *= 2; + timing->h_total = stream->timing.h_total; + timing->v_total = stream->timing.v_total; + timing->h_sync_width = stream->timing.h_sync_width; + timing->interlaced = stream->timing.flags.INTERLACE; + + hblank_start = stream->timing.h_total - stream->timing.h_front_porch; + + timing->h_blank_end = hblank_start - stream->timing.h_addressable + - stream->timing.h_border_left - stream->timing.h_border_right; + + if (hblank_start < stream->timing.h_addressable) + timing->h_blank_end = 0; + + vblank_start = stream->timing.v_total - stream->timing.v_front_porch; + + timing->v_blank_end = vblank_start - stream->timing.v_addressable + - stream->timing.v_border_top - stream->timing.v_border_bottom; + + timing->drr_config.enabled = stream->ignore_msa_timing_param; + timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; + timing->drr_config.drr_active_variable = stream->vrr_active_variable; + timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; + timing->drr_config.disallowed = !stream->allow_freesync; + //timing->drr_config.max_instant_vtotal_delta = timing-><drr no flicker delta lum>; + + if (stream->timing.flags.DSC) { + timing->dsc.enable = dml2_dsc_enable; + timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h; + timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel; + } else + timing->dsc.enable = dml2_dsc_disable; + + switch (stream->timing.display_color_depth) { + case COLOR_DEPTH_666: + timing->bpc = 6; + break; + case COLOR_DEPTH_888: + timing->bpc = 8; + break; + case COLOR_DEPTH_101010: + timing->bpc = 10; + break; + case COLOR_DEPTH_121212: + timing->bpc = 12; + break; + case COLOR_DEPTH_141414: + timing->bpc = 14; + break; + case COLOR_DEPTH_161616: + timing->bpc = 16; + break; + case COLOR_DEPTH_999: + timing->bpc = 9; + break; + case COLOR_DEPTH_111111: + timing->bpc = 11; + break; + default: + timing->bpc = 8; + break; + } + + timing->vblank_nom = timing->v_total - timing->v_active; +} + +static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, + struct dc_stream_state *stream, const struct pipe_ctx *pipe) +{ + output->output_dp_lane_count = 4; + + switch (stream->signal) { + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_DISPLAY_PORT: + output->output_encoder = dml2_dp; + if (check_dp2p0_output_encoder(pipe)) + output->output_encoder = dml2_dp2p0; + break; + case SIGNAL_TYPE_EDP: + output->output_encoder = dml2_edp; + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + output->output_encoder = dml2_hdmi; + break; + default: + output->output_encoder = dml2_dp; + } + + switch (stream->timing.pixel_encoding) { + case PIXEL_ENCODING_RGB: + case PIXEL_ENCODING_YCBCR444: + output->output_format = dml2_444; + break; + case PIXEL_ENCODING_YCBCR420: + output->output_format = dml2_420; + break; + case PIXEL_ENCODING_YCBCR422: + if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple) + output->output_format = dml2_n422; + else + output->output_format = dml2_s422; + break; + default: + output->output_format = dml2_444; + break; + } + + switch (stream->signal) { + case SIGNAL_TYPE_NONE: + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_RGB: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_VIRTUAL: + default: + output->output_dp_link_rate = dml2_dp_rate_na; + break; + } + + output->audio_sample_layout = stream->audio_info.modes->sample_size; + output->audio_sample_rate = stream->audio_info.modes->max_bit_rate; + output->output_disabled = true; + + //TODO : New to DML2.1. How do we populate this ? + // output->validate_output +} + +static void populate_dml21_stream_overrides_from_stream_state( + struct dml2_stream_parameters *stream_desc, + struct dc_stream_state *stream) +{ + switch (stream->debug.force_odm_combine_segments) { + case 0: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + case 1: + stream_desc->overrides.odm_mode = dml2_odm_mode_bypass; + break; + case 2: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + stream_desc->overrides.odm_mode = dml2_odm_mode_auto; + break; + } + if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy) + stream_desc->overrides.disable_dynamic_odm = true; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp; +} + +static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_linear; + + switch (addr3_mode) { + case DC_ADDR3_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_ADDR3_SW_256B_2D: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_ADDR3_SW_4KB_2D: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_ADDR3_SW_64KB_2D: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_ADDR3_SW_256KB_2D: + dml2_mode = dml2_sw_256kb_2d; + break; + default: + /* invalid swizzle mode for DML2.1 */ + ASSERT(false); + dml2_mode = dml2_sw_linear; + } + + return dml2_mode; +} + +static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode) +{ + enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d; + + switch (gfx9_mode) { + case DC_SW_LINEAR: + dml2_mode = dml2_sw_linear; + break; + case DC_SW_256_D: + case DC_SW_256_R: + dml2_mode = dml2_sw_256b_2d; + break; + case DC_SW_4KB_D: + case DC_SW_4KB_R: + case DC_SW_4KB_R_X: + dml2_mode = dml2_sw_4kb_2d; + break; + case DC_SW_64KB_D: + case DC_SW_64KB_D_X: + case DC_SW_64KB_R: + case DC_SW_64KB_R_X: + dml2_mode = dml2_sw_64kb_2d; + break; + case DC_SW_256B_S: + case DC_SW_4KB_S: + case DC_SW_64KB_S: + case DC_SW_VAR_S: + case DC_SW_VAR_D: + case DC_SW_VAR_R: + case DC_SW_64KB_S_T: + case DC_SW_64KB_D_T: + case DC_SW_4KB_S_X: + case DC_SW_4KB_D_X: + case DC_SW_64KB_S_X: + case DC_SW_VAR_S_X: + case DC_SW_VAR_D_X: + case DC_SW_VAR_R_X: + default: + /* + * invalid swizzle mode for DML2.1. This could happen because + * DML21 is not intended to be used by N-1 in production. To + * properly filter out unsupported swizzle modes, we will need + * to fix capability reporting when DML2.1 is used for N-1 in + * dc. So DML will only receive DML21 supported swizzle modes. + * This implementation is not added and has a low value because + * the supported swizzle modes should already cover most of our + * N-1 test cases. + */ + return dml2_sw_64kb_2d; + } + + return dml2_mode; +} + +static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream) +{ + surface->plane0.width = stream->timing.h_addressable; + surface->plane0.height = stream->timing.v_addressable; + surface->plane1.width = stream->timing.h_addressable; + surface->plane1.height = stream->timing.v_addressable; + surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128; + surface->plane1.pitch = 0; + surface->dcc.enable = false; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0; + surface->tiling = dml2_sw_64kb_2d; +} + +static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream) +{ + unsigned int width, height; + + if (stream->timing.h_addressable > 3840) + width = 3840; + else + width = stream->timing.h_addressable; // 4K max + + if (stream->timing.v_addressable > 2160) + height = 2160; + else + height = stream->timing.v_addressable; // 4K max + + plane->cursor.cursor_bpp = 32; + + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + plane->composition.viewport.plane0.width = width; + plane->composition.viewport.plane0.height = height; + plane->composition.viewport.plane1.width = 0; + plane->composition.viewport.plane1.height = 0; + + plane->composition.viewport.stationary = false; + plane->composition.viewport.plane0.x_start = 0; + plane->composition.viewport.plane0.y_start = 0; + plane->composition.viewport.plane1.x_start = 0; + plane->composition.viewport.plane1.y_start = 0; + + plane->composition.scaler_info.enabled = false; + plane->composition.rotation_angle = dml2_rotation_0; + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 0; + plane->composition.scaler_info.plane1.v_ratio = 0; + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 0; + plane->composition.scaler_info.plane1.v_taps = 0; + plane->composition.scaler_info.rect_out_width = width; + plane->pixel_format = dml2_444_32; + + plane->dynamic_meta_data.enable = false; + plane->overrides.gpuvm_min_page_size_kbytes = 256; +} + +static void populate_dml21_surface_config_from_plane_state( + const struct dc *in_dc, + struct dml2_surface_cfg *surface, + const struct dc_plane_state *plane_state) +{ + surface->plane0.pitch = plane_state->plane_size.surface_pitch; + surface->plane1.pitch = plane_state->plane_size.chroma_pitch; + surface->plane0.height = plane_state->plane_size.surface_size.height; + surface->plane0.width = plane_state->plane_size.surface_size.width; + surface->plane1.height = plane_state->plane_size.chroma_size.height; + surface->plane1.width = plane_state->plane_size.chroma_size.width; + surface->dcc.enable = plane_state->dcc.enable; + surface->dcc.informative.dcc_rate_plane0 = 1.0; + surface->dcc.informative.dcc_rate_plane1 = 1.0; + surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks; + surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c; + surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch; + surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c; + if (in_dc->ctx->dce_version < DCN_VERSION_4_01) { + /* needed for N-1 testing */ + surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle); + } else { + surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle); + } +} + +static const struct scaler_data *get_scaler_data_for_plane( + struct dml2_context *dml_ctx, + const struct dc_plane_state *in, + const struct dc_state *context) +{ + int i; + struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); + + for (i = 0; i < MAX_PIPES; i++) { + const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == in && !pipe->prev_odm_pipe) { + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + + dml_ctx->config.callbacks.build_scaling_params(temp_pipe); + break; + } + } + + ASSERT(i < MAX_PIPES); + return &temp_pipe->plane_res.scl_data; +} + +static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx, + struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state, + const struct dc_state *context, unsigned int stream_index) +{ + const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); + struct dc_stream_state *stream = context->streams[stream_index]; + + if (stream->cursor_attributes.color_format == CURSOR_MODE_MONO) + plane->cursor.cursor_bpp = 2; + else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_1BIT_AND + || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA + || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { + plane->cursor.cursor_bpp = 32; + } else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED + || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED) { + plane->cursor.cursor_bpp = 64; + } else + plane->cursor.cursor_bpp = 32; + + plane->cursor.cursor_width = 256; + plane->cursor.num_cursors = 1; + + switch (plane_state->format) { + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + plane->pixel_format = dml2_420_8; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + plane->pixel_format = dml2_420_10; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + plane->pixel_format = dml2_444_64; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + plane->pixel_format = dml2_444_16; + break; + case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: + plane->pixel_format = dml2_444_8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + plane->pixel_format = dml2_rgbe_alpha; + break; + default: + plane->pixel_format = dml2_444_32; + break; + } + + plane->composition.viewport.plane0.height = scaler_data->viewport.height; + plane->composition.viewport.plane0.width = scaler_data->viewport.width; + plane->composition.viewport.plane1.height = scaler_data->viewport_c.height; + plane->composition.viewport.plane1.width = scaler_data->viewport_c.width; + plane->composition.viewport.plane0.x_start = scaler_data->viewport.x; + plane->composition.viewport.plane0.y_start = scaler_data->viewport.y; + plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x; + plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y; + plane->composition.viewport.stationary = false; + plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value || + scaler_data->ratios.horz_c.value != dc_fixpt_one.value || + scaler_data->ratios.vert.value != dc_fixpt_one.value || + scaler_data->ratios.vert_c.value != dc_fixpt_one.value; + + if (!scaler_data->taps.h_taps) { + /* Above logic determines scaling should be enabled even when there are no taps for + * certain cases. Hence do corrective active and disable scaling. + */ + plane->composition.scaler_info.enabled = false; + } + + /* always_scale is only used for debug purposes not used in production but has to be + * maintained for certain complainces. */ + if (plane_state->ctx->dc->debug.always_scale == true) { + plane->composition.scaler_info.enabled = true; + } + + if (plane->composition.scaler_info.enabled == false) { + plane->composition.scaler_info.plane0.h_ratio = 1.0; + plane->composition.scaler_info.plane0.v_ratio = 1.0; + plane->composition.scaler_info.plane1.h_ratio = 1.0; + plane->composition.scaler_info.plane1.v_ratio = 1.0; + } else { + plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32); + plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32); + plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32); + plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32); + } + + if (!scaler_data->taps.h_taps) { + plane->composition.scaler_info.plane0.h_taps = 1; + plane->composition.scaler_info.plane1.h_taps = 1; + } else { + plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps; + plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c; + } + if (!scaler_data->taps.v_taps) { + plane->composition.scaler_info.plane0.v_taps = 1; + plane->composition.scaler_info.plane1.v_taps = 1; + } else { + plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps; + plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c; + } + + plane->composition.viewport.stationary = false; + + if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { + plane->tdlut.setup_for_tdlut = true; + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; + break; + } + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_171717: + plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined + break; + } + } else + plane->tdlut.setup_for_tdlut = false; + + plane->dynamic_meta_data.enable = false; + plane->dynamic_meta_data.lines_before_active_required = 0; + plane->dynamic_meta_data.transmitted_bytes = 0; + + plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width; + plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation; + plane->stream_index = stream_index; + + plane->overrides.gpuvm_min_page_size_kbytes = 256; + + plane->immediate_flip = plane_state->flip_immediate; + + plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable; +} + +//TODO : Could be possibly moved to a common helper layer. +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < context->stream_count; i++) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + + return false; +} + +static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream) +{ + int i = 0; + int location = -1; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) { + location = i; + break; + } + } + + return location; +} + +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, + const struct dc_plane_state *plane, const struct dc_state *context) +{ + unsigned int plane_id; + int i = 0; + int location = -1; + + if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + ASSERT(false); + return -1; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) { + location = i; + break; + } + } + + return location; +} + +static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method) +{ + enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto; + + switch (force_pstate_method) { + case dml2_force_pstate_method_vactive: + val = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_force_pstate_method_vblank: + val = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_force_pstate_method_drr: + val = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_force_pstate_method_subvp: + val = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_force_pstate_method_auto: + default: + val = dml2_uclk_pstate_change_strategy_auto; + } + + return val; +} + +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + int stream_index, plane_index; + int disp_cfg_stream_location, disp_cfg_plane_location; + struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + + dml_dispcfg->gpuvm_enable = true; + dml_dispcfg->gpuvm_max_page_table_levels = 4; + dml_dispcfg->hostvm_enable = false; + dml_dispcfg->minimize_det_reallocation = true; + dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { + disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); + + if (disp_cfg_stream_location < 0) + disp_cfg_stream_location = dml_dispcfg->num_streams++; + + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index]); + populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); + populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]); + + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed; + dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed; + + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id; + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true; + + if (context->stream_status[stream_index].plane_count == 0) { + disp_cfg_plane_location = dml_dispcfg->num_planes++; + populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]); + populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + } else { + for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + + if (disp_cfg_plane_location < 0) + disp_cfg_plane_location = dml_dispcfg->num_planes++; + + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + + populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); + populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; + + if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; + + /* apply forced pstate policy */ + if (dml_ctx->config.pmo.force_pstate_method_enable) { + dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = + dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_value); + } + } + } + } + + return true; +} + +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) +{ + /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz; +} + +void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; + double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;; + + if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { + /* invalid register set index */ + return; + } + + /* convert to legacy format (time in ns) */ + watermark->urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; + watermark->pte_meta_urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; + watermark->cstate_pstate.cstate_enter_plus_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_enter / refclk_freq_in_mhz) * 1000.0; + watermark->cstate_pstate.cstate_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_exit / refclk_freq_in_mhz) * 1000.0; + watermark->cstate_pstate.pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].uclk_pstate / refclk_freq_in_mhz) * 1000.0; + watermark->urgent_latency_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0; + watermark->cstate_pstate.fclk_pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].fclk_pstate / refclk_freq_in_mhz) * 1000.0; + watermark->frac_urg_bw_flip = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_flip; + watermark->frac_urg_bw_nom = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_nom; +} + +static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) +{ + struct dml2_dchub_watermark_regs *wm_regs = NULL; + + switch (wm_index) { + case DML2_DCHUB_WATERMARK_SET_A: + wm_regs = &watermarks->dcn4.a; + break; + case DML2_DCHUB_WATERMARK_SET_B: + wm_regs = &watermarks->dcn4.b; + break; + case DML2_DCHUB_WATERMARK_SET_C: + wm_regs = &watermarks->dcn4.c; + break; + case DML2_DCHUB_WATERMARK_SET_D: + wm_regs = &watermarks->dcn4.d; + break; + case DML2_DCHUB_WATERMARK_SET_NUM: + default: + /* invalid wm set index */ + wm_regs = NULL; + } + + return wm_regs; +} + +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx) +{ + const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming; + + unsigned int wm_index; + + /* copy watermark sets from DML */ + for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) { + struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index); + + if (wm_regs) + memcpy(wm_regs, + &programming->global_regs.wm_regs[wm_index], + sizeof(struct dml2_dchub_watermark_regs)); + } +} + + +void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming) +{ + unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + union dml2_global_sync_programming *global_sync = &stream_programming->global_sync; + + hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right; + vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top; + hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch; + vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch; + + hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right; + vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom; + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + /* phantom has its own global sync */ + global_sync = &stream_programming->phantom_stream.global_sync; + } + + pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4.vstartup_lines; + pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; + pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; + pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; + + pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; + + pipe_ctx->pipe_dlg_param.hactive = hactive; + pipe_ctx->pipe_dlg_param.vactive = vactive; + pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total; + pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total; + pipe_ctx->pipe_dlg_param.hblank_end = hblank_end; + pipe_ctx->pipe_dlg_param.vblank_end = vblank_end; + pipe_ctx->pipe_dlg_param.hblank_start = hblank_start; + pipe_ctx->pipe_dlg_param.vblank_start = vblank_start; + pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch; + pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00; + pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total; + pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max; + pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min; + pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width; + pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height; + pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width; +} + +void dml21_map_hw_resources(struct dml2_context *dml_ctx) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true; + } + +} + +void dml21_get_pipe_mcache_config( + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_pipe_configuration_descriptor *mcache_pipe_config) +{ + mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x; + mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width; + + mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x; + mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width; + + mcache_pipe_config->plane1_enabled = + dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h new file mode 100644 index 000000000000..4cc0a1fbb93d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_TRANSLATION_HELPER_H_ +#define _DML21_TRANSLATION_HELPER_H_ + +struct dc; +struct dc_state; +struct dcn_watermarks; +union dcn_watermark_set; +struct pipe_ctx; + +struct dml2_context; +struct dml2_configuration_options; +struct dml2_initialize_instance_in_out; + +void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); +void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming); +void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx); +void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); +void dml21_map_hw_resources(struct dml2_context *dml_ctx); +void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c new file mode 100644 index 000000000000..aa0cc4bb2b47 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -0,0 +1,531 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml21_translation_helper.h" +#include "dml2_internal_types.h" +#include "dml21_utils.h" +#include "dml2_dc_resource_mgmt.h" + +#include "dml2_core_dcn4_calcs.h" + + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id) + return i; + } + + return -1; +} + +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id) +{ + int i; + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id) + return i; + } + + return -1; +} + +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id) +{ + int i, j; + + if (!plane_id) + return false; + + for (i = 0; i < state->stream_count; i++) { + for (j = 0; j < state->stream_status[i].plane_count; j++) { + if (state->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } + } + } + + return false; +} + +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id) +{ + return 0xffff & plane_id; +} + +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index) +{ + unsigned int i = 0; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) { + *dml_pipe_idx = i; + return; + } + } +} + +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx) +{ + struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe); + + *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head); + + if (pipe->plane_state) + *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe); +} + +/* places pipe references into pipes arrays and returns number of pipes */ +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx **dc_main_pipes, + struct pipe_ctx **dc_phantom_pipes, + int dml_plane_idx) +{ + unsigned int dml_stream_index; + unsigned int main_stream_id; + unsigned int dc_plane_index; + struct dc_stream_state *dc_main_stream; + struct dc_stream_status *dc_main_stream_status; + struct dc_plane_state *dc_main_plane; + struct dc_stream_state *dc_phantom_stream; + struct dc_stream_status *dc_phantom_stream_status; + struct dc_plane_state *dc_phantom_plane; + int num_pipes = 0; + + dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index; + main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]; + + dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id); + dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream); + + /* find main plane based on id */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]); + dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index]; + + if (dc_main_plane) { + num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes); + } else { + /* stream was configured with dummy plane, so get pipes from opp head */ + struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream); + num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes); + } + + /* if phantom exists, find associated pipes */ + dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream); + if (dc_phantom_stream && num_pipes > 0) { + dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream); + + /* phantom plane will have same index as main */ + dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index]; + + dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes); + } + + return num_pipes; +} + + +void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + struct pipe_ctx *out) +{ + memset(&out->rq_regs, 0, sizeof(out->rq_regs)); + out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size; + out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size; + //out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size; + //out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size; + out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size; + out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size; + out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height; + out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear; + + out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size; + out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size; + //out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size; + //out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size; + out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size; + out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size; + out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height; + out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear; + + out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode; + out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode; + //out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode; + out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode; + out->rq_regs.plane1_base_address = rq_regs->plane1_base_address; + out->unbounded_req = rq_regs->unbounded_request_enabled; + + memset(&out->dlg_regs, 0, sizeof(out->dlg_regs)); + out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end; + out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end; + out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal; + out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler; + out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler; + out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch; + out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank; + out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank; + out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip; + out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip; + out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq; + out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch; + out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c; + out->dlg_regs.refcyc_per_tdlut_group = disp_dlg_regs->refcyc_per_tdlut_group; + out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l; + out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c; + //out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; + //out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c; + out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l; + out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c; + //out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l; + //out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c; + out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l; + out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c; + out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l; + out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c; + //out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l; + //out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c; + //out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l; + //out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c; + out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l; + out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c; + out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l; + out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c; + out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank; + out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip; + out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank; + out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip; + out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0; + out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0; + //out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1; + //out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1; + out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0; + out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit; + out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata; + out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta; + + memset(&out->ttu_regs, 0, sizeof(out->ttu_regs)); + out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm; + out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm; + out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank; + out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip; + out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l; + out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c; + out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0; + //out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1; + out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l; + out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c; + out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0; + //out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1; + out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l; + out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c; + out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0; + //out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1; + out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l; + out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c; + out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0; + //out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1; +} + +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe) +{ + + /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */ + /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */ + if (dc_pipe->stream && dc_pipe->plane_state && + (dc_pipe->top_pipe == NULL || + dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) && + dc_pipe->prev_odm_pipe == NULL) { + /* SS: all active surfaces stored in MALL */ + if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) { + dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } else { + /* SUBVP: phantom surfaces only stored in MALL */ + dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes; + } + } +} + +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +{ + /* If this assert is hit then we have a link encoder dynamic management issue */ + ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal)); +} + +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog) +{ + unsigned int pipe_reg_index = 0; + + dml21_populate_pipe_ctx_dlg_params(dml_ctx, context, pipe_ctx, stream_prog); + find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index); + + if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = false; + + /* legacy only, should be removed later */ + dml21_update_pipe_ctx_dchub_regs(&pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->rq_regs, + &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->dlg_regs, + &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); + + pipe_ctx->det_buffer_size_kb = 0; + } else { + memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set)); + pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled; + + /* legacy only, should be removed later */ + dml21_update_pipe_ctx_dchub_regs(&pln_prog->pipe_regs[pipe_reg_index]->rq_regs, + &pln_prog->pipe_regs[pipe_reg_index]->dlg_regs, + &pln_prog->pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx); + + pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; + } + + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4.dppclk_khz; + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; + + dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); +} + +static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *main_stream, + struct dml2_per_stream_programming *stream_programming) +{ + struct dc_stream_state *phantom_stream; + struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor; + + phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream); + + /* copy details of phantom stream from main */ + memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst)); + + /* modify timing for phantom */ + phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch; + phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active; + phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total; + phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active; + + phantom_stream->src.y = 0; + phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height; + + phantom_stream->use_dynamic_meta = false; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream); + + return phantom_stream; +} + +static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx, + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *main_plane, + struct dml2_per_plane_programming *plane_programming) +{ + struct dc_plane_state *phantom_plane; + + phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane); + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &main_plane->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc)); + + phantom_plane->format = main_plane->format; + phantom_plane->rotation = main_plane->rotation; + phantom_plane->visible = main_plane->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->src.height; + + dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context); + + return phantom_plane; +} + +void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int dml_stream_index, dml_plane_index, dc_plane_index; + struct dc_stream_state *main_stream; + struct dc_stream_status *main_stream_status; + struct dc_stream_state *phantom_stream; + struct dc_plane_state *main_plane; + bool phantoms_added = false; + + /* create phantom streams and planes and add to context */ + for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) { + /* iterate through DML streams looking for phantoms */ + if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) { + /* find associated dc stream */ + main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, + dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]); + + main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream); + + if (main_stream_status->plane_count == 0) + continue; + + /* create phantom stream for subvp enabled stream */ + phantom_stream = dml21_add_phantom_stream(dml_ctx, + dc, + context, + main_stream, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]); + + /* iterate through DML planes associated with this stream */ + for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) { + if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) { + /* find associated dc plane */ + dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]); + main_plane = main_stream_status->plane_states[dc_plane_index]; + + /* create phantom planes for subvp enabled plane */ + dml21_add_phantom_plane(dml_ctx, + dc, + context, + phantom_stream, + main_plane, + &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]); + + phantoms_added = true; + } + } + } + } + + if (phantoms_added) + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state); +} + +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx) +{ + int i, j, k; + + /* reset fams2 data */ + context->bw_ctx.bw.dcn.fams2_stream_count = 0; + memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + + if (!dml_ctx->v21.mode_programming.programming->fams2_required) + return; + + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; + + struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[context->bw_ctx.bw.dcn.fams2_stream_count]; + + struct dc_stream_state *stream = context->streams[i]; + + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; + } + + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + ASSERT(dml_stream_idx >= 0); + + /* copy static state from PMO */ + memcpy(static_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, + sizeof(struct dmub_fams2_stream_static_state)); + + /* get information from context */ + static_state->num_planes = context->stream_status[i].plane_count; + static_state->otg_inst = context->stream_status[i].primary_otg_inst; + + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_state->pipe_mask |= (1 << k); + static_state->plane_pipe_masks[j] |= (1 << k); + } + } + } + + /* get per method programming */ + switch (static_state->type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } + } + break; + default: + ASSERT(false); + break; + } + + context->bw_ctx.bw.dcn.fams2_stream_count++; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_stream_count > 0; +} + +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) +{ + return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h new file mode 100644 index 000000000000..82080397a50e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_UTILS_H_ +#define _DML21_UTILS_H_ + +struct dc_state; +struct dc_plane_state; +struct pipe_ctx; + +struct dml2_context; +struct dml2_display_rq_regs; +struct dml2_display_dlg_regs; +struct dml2_display_ttu_regs; + +int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); +int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id); +bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id); +void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + struct pipe_ctx *out); +void dml21_populate_mall_allocation_size(struct dc_state *context, + struct dml2_context *in_ctx, + struct dml2_per_plane_programming *pln_prog, + struct pipe_ctx *dc_pipe); +bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx); +void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index); +void find_pipe_regs_idx(const struct dml2_context *dml_ctx, + struct pipe_ctx *pipe, unsigned int *pipe_regs_idx); +int dml21_find_dc_pipes_for_plane(const struct dc *in_dc, + struct dc_state *context, + struct dml2_context *dml_ctx, + struct pipe_ctx **dc_main_pipes, + struct pipe_ctx **dc_phantom_pipes, + int dml_plane_idx); +void dml21_program_dc_pipe(struct dml2_context *dml_ctx, + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dml2_per_plane_programming *pln_prog, + struct dml2_per_stream_programming *stream_prog); +void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); +unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id); +void dml21_build_fams2_programming(const struct dc *dc, + struct dc_state *context, + struct dml2_context *dml_ctx); +bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format); +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c new file mode 100644 index 000000000000..f88a6fea5934 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_types.h" +#include "dml_top.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_internal_shared_types.h" +#include "dml21_utils.h" +#include "dml21_translation_helper.h" +#include "dml2_dc_resource_mgmt.h" + +static bool dml21_allocate_memory(struct dml2_context **dml_ctx) +{ + *dml_ctx = (struct dml2_context *)kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + if (!(*dml_ctx)) + return false; + + (*dml_ctx)->v21.dml_init.dml2_instance = (struct dml2_instance *)kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + if (!((*dml_ctx)->v21.dml_init.dml2_instance)) + return false; + + (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance; + + (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; + (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; + + (*dml_ctx)->v21.mode_programming.programming = (struct dml2_display_cfg_programming *)kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + if (!((*dml_ctx)->v21.mode_programming.programming)) + return false; + + return true; +} + +static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +{ + bool disable_fams2; + struct dml2_pmo_options *pmo_options = &dml_ctx->v21.dml_init.options.pmo_options; + + /* ODM options */ + pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; + pmo_options->disable_dyn_odm_for_multi_stream = true; + pmo_options->disable_dyn_odm_for_stream_with_svp = true; + + /* UCLK P-State options */ + if (in_dc->debug.dml21_force_pstate_method) { + dml_ctx->config.pmo.force_pstate_method_enable = true; + dml_ctx->config.pmo.force_pstate_method_value = in_dc->debug.dml21_force_pstate_method_value; + } else { + dml_ctx->config.pmo.force_pstate_method_enable = false; + } + + pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); + + /* NOTE: DRR and SubVP Require FAMS2 */ + disable_fams2 = !in_dc->debug.fams2_config.bits.enable; + pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || + in_dc->debug.force_disable_subvp || + disable_fams2; + pmo_options->disable_drr_fixed = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || + disable_fams2; + pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || + disable_fams2; + pmo_options->disable_fams2 = disable_fams2; + + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming; +} + +static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +{ + switch (in_dc->ctx->dce_version) { + case DCN_VERSION_4_01: + case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete. + (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; + break; + default: + (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_invalid; + } + + (*dml_ctx)->architecture = dml2_architecture_21; + + /* Store configuration options */ + (*dml_ctx)->config = *config; + + /*Initialize SOCBB and DCNIP params */ + dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc); + dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc); + dml21_apply_soc_bb_overrides(&(*dml_ctx)->v21.dml_init, config, in_dc); + + /* apply debug overrides */ + dml21_apply_debug_options(in_dc, *dml_ctx, config); + + /*Initialize DML21 instance */ + dml2_initialize_instance(&(*dml_ctx)->v21.dml_init); +} + +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dml_ctx)) + return false; + + dml21_init(in_dc, dml_ctx, config); + + return true; +} + +static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, + struct dml2_context *in_ctx, unsigned int pipe_cnt) +{ + unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0; + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_per_stream_programming *stream_prog = NULL; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + int num_pipes; + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + + /* copy global DCHUBBUB arbiter registers */ + memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs)); + + /* legacy only */ + context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64; + + context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0; + context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0; + + for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) { + pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + if (!pln_prog->plane_descriptor) + continue; + + stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index]; + num_dpps_required = pln_prog->num_dpps_required; + + if (num_dpps_required == 0) { + continue; + } + num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + + if (num_pipes <= 0) + continue; + + /* program each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog); + + if (pln_prog->phantom_plane.valid) { + dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog); + } + } + } + + /* assign global clocks */ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000; + } + + if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = + in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000; + } else { + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000; + } + + /* get global mall allocation */ + if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) { + context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes); + } else { + context->bw_ctx.bw.dcn.clk.num_ways = 0; + } +} + +static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool result = false; + struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out)); + + if (!context || context->stream_count == 0) + return true; + + /* scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + /* Populate stream, plane mappings and other fields in display config. */ + result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + if (!result) + return false; + + result = dml2_build_mode_programming(mode_programming); + if (!result) + return false; + + /* Check and map HW resources */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_map_hw_resources(dml_ctx); + dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state); + /* if subvp phantoms are present, expand them into dc context */ + dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx); + } + + /* Copy DML CLK, WM and REG outputs to bandwidth context */ + if (result && !dml_ctx->config.skip_hw_state_mapping) { + dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count); + dml21_copy_clocks_to_dc_state(dml_ctx, context); + dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx); + if (in_dc->ctx->dce_version == DCN_VERSION_3_2) { + dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.a, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); + dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.b, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); + dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.c, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); + dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.d, DML2_DCHUB_WATERMARK_SET_A, dml_ctx); + } + + dml21_build_fams2_programming(in_dc, context, dml_ctx); + } + + return true; +} + +static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + bool is_supported = false; + struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init; + struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support; + + memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); + memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); + memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out)); + + if (!context || context->stream_count == 0) + return true; + + /* Scrub phantom's from current dc_state */ + dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context); + dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); + + mode_support->dml2_instance = dml_init->dml2_instance; + dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + is_supported = dml2_check_mode_supported(mode_support); + if (!is_supported) + return false; + + return true; +} + +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, bool fast_validate) +{ + bool out = false; + + /* Use dml_validate_only for fast_validate path */ + if (fast_validate) { + out = dml21_check_mode_support(in_dc, context, dml_ctx); + } else + out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + return out; +} + +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) +{ + unsigned int num_pipes, dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index; + struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + + struct dml2_per_plane_programming *pln_prog = NULL; + struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL; + struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals; + + if (context->stream_count == 0) { + return; + } + + memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out)); + l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance; + + /* phantom's start after main planes */ + dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes; + + /* Build mcache programming parameters per plane per pipe */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + if (pln_prog->num_dpps_required == 0) { + continue; + } + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + + if (num_pipes <= 0 || + dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid) { + mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx]; + memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor)); + mcache_config->plane_descriptor = pln_prog->plane_descriptor; + mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx]; + mcache_config->num_pipes = pln_prog->num_dpps_required; + l->build_mcache_programming_params.num_configurations++; + + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]); + } + + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } + + /* Call to generate mcache programming per plane per pipe for the given display configuration */ + dml2_build_mcache_programming(&l->build_mcache_programming_params); + + /* get per plane per pipe mcache programming */ + for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) { + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + + num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx); + + if (num_pipes <= 0 || + dc_main_pipes[0]->stream == NULL || + dc_main_pipes[0]->plane_state == NULL) + continue; + + /* get config for each pipe */ + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_main_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) { + memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + + /* get config for each phantom pipe */ + if (pln_prog->phantom_plane.valid) { + for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) { + ASSERT(dc_phantom_pipes[dc_pipe_index]); + if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) { + memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs, + l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index], + sizeof(struct dml2_hubp_pipe_mcache_regs)); + } + } + /* increment phantom index */ + dml_phantom_prog_idx++; + } + } +} + +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Preserve references to internals */ + struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance; + struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming; + + /* Copy context */ + memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context)); + + /* Copy Internals */ + memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance)); + memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming)); + + /* Restore references to internals */ + dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance; + dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance; + + dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config; + dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config; + + dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + + /* need to initialize copied instance for internal references to be correct */ + dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); +} + +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx) +{ + /* Allocate memory for initializing DML21 instance */ + if (!dml21_allocate_memory(dst_dml_ctx)) + return false; + + dml21_copy(*dst_dml_ctx, src_dml_ctx); + + return true; +} + +void dml21_reinit(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +{ + dml21_init(in_dc, dml_ctx, config); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h new file mode 100644 index 000000000000..6708f7117fbd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef _DML21_WRAPPER_H_ +#define _DML21_WRAPPER_H_ + +#include "os_types.h" +#include "dml_top_soc_parameter_types.h" + +struct dc; +struct dc_state; +struct dml2_configuration_options; +struct dml2_context; + +/** + * dml2_create - Creates dml21_context. + * @in_dc: dc. + * @dml2: Created dml21 context. + * @config: dml21 configuration options. + * + * Create of DML21 is done as part of dc_state creation. + * DML21 IP, SOC and STATES are initialized at + * creation time. + * + * Return: True if dml2 is successfully created, false otherwise. + */ +bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); +void dml21_copy(struct dml2_context *dst_dml_ctx, + struct dml2_context *src_dml_ctx); +bool dml21_create_copy(struct dml2_context **dst_dml_ctx, + struct dml2_context *src_dml_ctx); +void dml21_reinit(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); + +/** + * dml21_validate - Determines if a display configuration is supported or not. + * @in_dc: dc. + * @context: dc_state to be validated. + * @fast_validate: Fast validate will not populate context.res_ctx. + * + * Based on fast_validate option internally would call: + * + * -dml21_mode_check_and_programming - for non fast_validate option + * Calculates if dc_state can be supported on the input display + * configuration. If supported, generates the necessary HW + * programming for the new dc_state. + * + * -dml21_check_mode_support - for fast_validate option + * Calculates if dc_state can be supported for the input display + * config. + + * Context: Two threads may not invoke this function concurrently unless they reference + * separate dc_states for validation. + * Return: True if mode is supported, false otherwise. + */ +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, bool fast_validate); + +/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */ +void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); + +/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */ +struct socbb_ip_params_external { + struct dml2_ip_capabilities ip_params; + struct dml2_soc_bb soc_bb; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h new file mode 100644 index 000000000000..521f77b8ac44 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h @@ -0,0 +1,401 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML_DML_DCN3_SOC_BB__ +#define __DML_DML_DCN3_SOC_BB__ + +#include "dml_top_soc_parameter_types.h" + +static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 12, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4 = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 257, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + .minimum_uclk_khz = 97, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 61, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 118, + }, + { + .minimum_uclk_khz = 435, + .urgent_ramp_uclk_cycles = 546, + .trip_to_memory_uclk_cycles = 848, + .meta_trip_to_memory_uclk_cycles = 848, + .maximum_latency_when_urgent_uclk_cycles = 146, + .average_latency_when_urgent_uclk_cycles = 90, + .maximum_latency_when_non_urgent_uclk_cycles = 848, + .average_latency_when_non_urgent_uclk_cycles = 135, + }, + { + .minimum_uclk_khz = 731, + .urgent_ramp_uclk_cycles = 632, + .trip_to_memory_uclk_cycles = 874, + .meta_trip_to_memory_uclk_cycles = 874, + .maximum_latency_when_urgent_uclk_cycles = 232, + .average_latency_when_urgent_uclk_cycles = 124, + .maximum_latency_when_non_urgent_uclk_cycles = 874, + .average_latency_when_non_urgent_uclk_cycles = 155, + }, + { + .minimum_uclk_khz = 1187, + .urgent_ramp_uclk_cycles = 716, + .trip_to_memory_uclk_cycles = 902, + .meta_trip_to_memory_uclk_cycles = 902, + .maximum_latency_when_urgent_uclk_cycles = 316, + .average_latency_when_urgent_uclk_cycles = 160, + .maximum_latency_when_non_urgent_uclk_cycles = 902, + .average_latency_when_non_urgent_uclk_cycles = 177, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4, +}; + +static const struct dml2_soc_bb dml2_socbb_dcn31 = { + .clk_table = { + .uclk = { + .clk_values_khz = {97000, 435000, 731000, 1187000}, + .num_clk_values = 4, + }, + .fclk = { + .clk_values_khz = {300000, 2500000}, + .num_clk_values = 2, + }, + .dcfclk = { + .clk_values_khz = {200000, 1800000}, + .num_clk_values = 2, + }, + .dispclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dppclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dtbclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .phyclk = { + .clk_values_khz = {810000, 810000}, + .num_clk_values = 2, + }, + .socclk = { + .clk_values_khz = {300000, 1600000}, + .num_clk_values = 2, + }, + .dscclk = { + .clk_values_khz = {666667, 666667}, + .num_clk_values = 2, + }, + .phyclk_d18 = { + .clk_values_khz = {625000, 625000}, + .num_clk_values = 2, + }, + .phyclk_d32 = { + .clk_values_khz = {2000000, 2000000}, + .num_clk_values = 2, + }, + .dram_config = { + .channel_width_bytes = 2, + .channel_count = 16, + .transactions_per_clock = 16, + }, + }, + + .qos_parameters = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 0, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4 = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 260, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + // State 1 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 72, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 117, + }, + { + // State 2 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 546, + .trip_to_memory_uclk_cycles = 848, + .meta_trip_to_memory_uclk_cycles = 848, + .maximum_latency_when_urgent_uclk_cycles = 146, + .average_latency_when_urgent_uclk_cycles = 146, + .maximum_latency_when_non_urgent_uclk_cycles = 848, + .average_latency_when_non_urgent_uclk_cycles = 133, + }, + { + // State 3 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 564, + .trip_to_memory_uclk_cycles = 853, + .meta_trip_to_memory_uclk_cycles = 853, + .maximum_latency_when_urgent_uclk_cycles = 164, + .average_latency_when_urgent_uclk_cycles = 164, + .maximum_latency_when_non_urgent_uclk_cycles = 853, + .average_latency_when_non_urgent_uclk_cycles = 136, + }, + { + // State 4 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 613, + .trip_to_memory_uclk_cycles = 869, + .meta_trip_to_memory_uclk_cycles = 869, + .maximum_latency_when_urgent_uclk_cycles = 213, + .average_latency_when_urgent_uclk_cycles = 213, + .maximum_latency_when_non_urgent_uclk_cycles = 869, + .average_latency_when_non_urgent_uclk_cycles = 149, + }, + { + // State 5 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 632, + .trip_to_memory_uclk_cycles = 874, + .meta_trip_to_memory_uclk_cycles = 874, + .maximum_latency_when_urgent_uclk_cycles = 232, + .average_latency_when_urgent_uclk_cycles = 232, + .maximum_latency_when_non_urgent_uclk_cycles = 874, + .average_latency_when_non_urgent_uclk_cycles = 153, + }, + { + // State 6 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 665, + .trip_to_memory_uclk_cycles = 885, + .meta_trip_to_memory_uclk_cycles = 885, + .maximum_latency_when_urgent_uclk_cycles = 265, + .average_latency_when_urgent_uclk_cycles = 265, + .maximum_latency_when_non_urgent_uclk_cycles = 885, + .average_latency_when_non_urgent_uclk_cycles = 161, + }, + { + // State 7 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 689, + .trip_to_memory_uclk_cycles = 895, + .meta_trip_to_memory_uclk_cycles = 895, + .maximum_latency_when_urgent_uclk_cycles = 289, + .average_latency_when_urgent_uclk_cycles = 289, + .maximum_latency_when_non_urgent_uclk_cycles = 895, + .average_latency_when_non_urgent_uclk_cycles = 167, + }, + { + // State 8 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 716, + .trip_to_memory_uclk_cycles = 902, + .meta_trip_to_memory_uclk_cycles = 902, + .maximum_latency_when_urgent_uclk_cycles = 316, + .average_latency_when_urgent_uclk_cycles = 316, + .maximum_latency_when_non_urgent_uclk_cycles = 902, + .average_latency_when_non_urgent_uclk_cycles = 174, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4, + }, + + .power_management_parameters = { + .dram_clk_change_blackout_us = 400, + .fclk_change_blackout_us = 0, + .g7_ppt_blackout_us = 0, + .stutter_enter_plus_exit_latency_us = 50, + .stutter_exit_latency_us = 43, + .z8_stutter_enter_plus_exit_latency_us = 0, + .z8_stutter_exit_latency_us = 0, + }, + + .vmin_limit = { + .dispclk_khz = 600 * 1000, + }, + + .dprefclk_mhz = 700, + .xtalclk_mhz = 100, + .pcie_refclk_mhz = 100, + .dchub_refclk_mhz = 50, + .mall_allocated_for_dcn_mbytes = 64, + .max_outstanding_reqs = 512, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .hostvm_min_page_size_kbytes = 0, + .gpuvm_min_page_size_kbytes = 256, + .phy_downspread_percent = 0, + .dcn_downspread_percent = 0, + .dispclk_dppclk_vco_speed_mhz = 4500, + .do_urgent_latency_adjustment = 0, + .mem_word_bytes = 32, + .num_dcc_mcaches = 8, + .mcache_size_bytes = 2048, + .mcache_line_size_bytes = 32, + .max_fclk_for_uclk_dpm_khz = 1250 * 1000, +}; + +static const struct dml2_ip_capabilities dml2_dcn31_max_ip_caps = { + .pipe_count = 4, + .otg_count = 4, + .num_dsc = 4, + .max_num_dp2p0_streams = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_dp2p0_outputs = 4, + .rob_buffer_size_kbytes = 192, + .config_return_buffer_size_in_kbytes = 1152, + .meta_fifo_size_in_kentries = 22, + .compressed_buffer_segment_size_in_kbytes = 64, + .subvp_drr_scheduling_margin_us = 100, + .subvp_prefetch_end_to_mall_start_us = 15, + .subvp_fw_processing_delay = 15, + + .fams2 = { + .max_allow_delay_us = 100 * 1000, + .scheduling_delay_us = 50, + .vertical_interrupt_ack_delay_us = 18, + .allow_programming_delay_us = 18, + .min_allow_width_us = 20, + .subvp_df_throttle_delay_us = 100, + .subvp_programming_delay_us = 18, + .subvp_prefetch_to_mall_delay_us = 18, + .drr_programming_delay_us = 18, + }, +}; + +#endif /* __DML_DML_DCN3_SOC_BB__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h new file mode 100644 index 000000000000..cb7a210e435a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_DML_DCN4_SOC_BB__ +#define __DML_DML_DCN4_SOC_BB__ + +#include "dml_top_soc_parameter_types.h" + +static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 17, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 33, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 12, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4 = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 257, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + .minimum_uclk_khz = 97 * 1000, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 61, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 118, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4, +}; + +static const struct dml2_soc_bb dml2_socbb_dcn401 = { + .clk_table = { + .uclk = { + .clk_values_khz = {97000}, + .num_clk_values = 1, + }, + .fclk = { + .clk_values_khz = {300000, 2500000}, + .num_clk_values = 2, + }, + .dcfclk = { + .clk_values_khz = {200000, 1564000}, + .num_clk_values = 2, + }, + .dispclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dppclk = { + .clk_values_khz = {100000, 2000000}, + .num_clk_values = 2, + }, + .dtbclk = { + .clk_values_khz = {100000, 1564000}, + .num_clk_values = 2, + }, + .phyclk = { + .clk_values_khz = {810000, 810000}, + .num_clk_values = 2, + }, + .socclk = { + .clk_values_khz = {300000, 1200000}, + .num_clk_values = 2, + }, + .dscclk = { + .clk_values_khz = {666667, 666667}, + .num_clk_values = 2, + }, + .phyclk_d18 = { + .clk_values_khz = {667000, 667000}, + .num_clk_values = 2, + }, + .phyclk_d32 = { + .clk_values_khz = {2000000, 2000000}, + .num_clk_values = 2, + }, + .dram_config = { + .channel_width_bytes = 2, + .channel_count = 16, + .transactions_per_clock = 16, + }, + }, + + .qos_parameters = { + .derate_table = { + .system_active_urgent = { + .dram_derate_percent_pixel = 22, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 76, + .dcfclk_derate_percent = 100, + }, + .system_active_average = { + .dram_derate_percent_pixel = 15, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 57, + .dcfclk_derate_percent = 75, + }, + .dcn_mall_prefetch_urgent = { + .dram_derate_percent_pixel = 40, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + .dcn_mall_prefetch_average = { + .dram_derate_percent_pixel = 30, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 62, + .dcfclk_derate_percent = 83, + }, + .system_idle_average = { + .dram_derate_percent_pixel = 70, + .dram_derate_percent_vm = 0, + .dram_derate_percent_pixel_and_vm = 0, + .fclk_derate_percent = 83, + .dcfclk_derate_percent = 100, + }, + }, + .writeback = { + .base_latency_us = 0, + .scaling_factor_us = 0, + .scaling_factor_mhz = 0, + }, + .qos_params = { + .dcn4 = { + .df_qos_response_time_fclk_cycles = 300, + .max_round_trip_to_furthest_cs_fclk_cycles = 350, + .mall_overhead_fclk_cycles = 50, + .meta_trip_adder_fclk_cycles = 36, + .average_transport_distance_fclk_cycles = 260, + .umc_urgent_ramp_latency_margin = 50, + .umc_max_latency_margin = 30, + .umc_average_latency_margin = 20, + .fabric_max_transport_latency_margin = 20, + .fabric_average_transport_latency_margin = 10, + + .per_uclk_dpm_params = { + { + // State 1 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 472, + .trip_to_memory_uclk_cycles = 827, + .meta_trip_to_memory_uclk_cycles = 827, + .maximum_latency_when_urgent_uclk_cycles = 72, + .average_latency_when_urgent_uclk_cycles = 72, + .maximum_latency_when_non_urgent_uclk_cycles = 827, + .average_latency_when_non_urgent_uclk_cycles = 117, + }, + { + // State 2 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 546, + .trip_to_memory_uclk_cycles = 848, + .meta_trip_to_memory_uclk_cycles = 848, + .maximum_latency_when_urgent_uclk_cycles = 146, + .average_latency_when_urgent_uclk_cycles = 146, + .maximum_latency_when_non_urgent_uclk_cycles = 848, + .average_latency_when_non_urgent_uclk_cycles = 133, + }, + { + // State 3 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 564, + .trip_to_memory_uclk_cycles = 853, + .meta_trip_to_memory_uclk_cycles = 853, + .maximum_latency_when_urgent_uclk_cycles = 164, + .average_latency_when_urgent_uclk_cycles = 164, + .maximum_latency_when_non_urgent_uclk_cycles = 853, + .average_latency_when_non_urgent_uclk_cycles = 136, + }, + { + // State 4 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 613, + .trip_to_memory_uclk_cycles = 869, + .meta_trip_to_memory_uclk_cycles = 869, + .maximum_latency_when_urgent_uclk_cycles = 213, + .average_latency_when_urgent_uclk_cycles = 213, + .maximum_latency_when_non_urgent_uclk_cycles = 869, + .average_latency_when_non_urgent_uclk_cycles = 149, + }, + { + // State 5 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 632, + .trip_to_memory_uclk_cycles = 874, + .meta_trip_to_memory_uclk_cycles = 874, + .maximum_latency_when_urgent_uclk_cycles = 232, + .average_latency_when_urgent_uclk_cycles = 232, + .maximum_latency_when_non_urgent_uclk_cycles = 874, + .average_latency_when_non_urgent_uclk_cycles = 153, + }, + { + // State 6 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 665, + .trip_to_memory_uclk_cycles = 885, + .meta_trip_to_memory_uclk_cycles = 885, + .maximum_latency_when_urgent_uclk_cycles = 265, + .average_latency_when_urgent_uclk_cycles = 265, + .maximum_latency_when_non_urgent_uclk_cycles = 885, + .average_latency_when_non_urgent_uclk_cycles = 161, + }, + { + // State 7 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 689, + .trip_to_memory_uclk_cycles = 895, + .meta_trip_to_memory_uclk_cycles = 895, + .maximum_latency_when_urgent_uclk_cycles = 289, + .average_latency_when_urgent_uclk_cycles = 289, + .maximum_latency_when_non_urgent_uclk_cycles = 895, + .average_latency_when_non_urgent_uclk_cycles = 167, + }, + { + // State 8 + .minimum_uclk_khz = 0, + .urgent_ramp_uclk_cycles = 716, + .trip_to_memory_uclk_cycles = 902, + .meta_trip_to_memory_uclk_cycles = 902, + .maximum_latency_when_urgent_uclk_cycles = 316, + .average_latency_when_urgent_uclk_cycles = 316, + .maximum_latency_when_non_urgent_uclk_cycles = 902, + .average_latency_when_non_urgent_uclk_cycles = 174, + }, + }, + }, + }, + .qos_type = dml2_qos_param_type_dcn4, + }, + + .power_management_parameters = { + .dram_clk_change_blackout_us = 400, + .fclk_change_blackout_us = 0, + .g7_ppt_blackout_us = 0, + .stutter_enter_plus_exit_latency_us = 21, + .stutter_exit_latency_us = 16, + .z8_stutter_enter_plus_exit_latency_us = 0, + .z8_stutter_exit_latency_us = 0, + }, + + .vmin_limit = { + .dispclk_khz = 600 * 1000, + }, + + .dprefclk_mhz = 700, + .xtalclk_mhz = 100, + .pcie_refclk_mhz = 100, + .dchub_refclk_mhz = 50, + .mall_allocated_for_dcn_mbytes = 64, + .max_outstanding_reqs = 512, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .hostvm_min_page_size_kbytes = 0, + .gpuvm_min_page_size_kbytes = 256, + .phy_downspread_percent = 0, + .dcn_downspread_percent = 0, + .dispclk_dppclk_vco_speed_mhz = 4500, + .do_urgent_latency_adjustment = 0, + .mem_word_bytes = 32, + .num_dcc_mcaches = 8, + .mcache_size_bytes = 2048, + .mcache_line_size_bytes = 32, + .max_fclk_for_uclk_dpm_khz = 1250 * 1000, +}; + +static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { + .pipe_count = 4, + .otg_count = 4, + .num_dsc = 4, + .max_num_dp2p0_streams = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_dp2p0_outputs = 4, + .rob_buffer_size_kbytes = 192, + .config_return_buffer_size_in_kbytes = 1344, + .meta_fifo_size_in_kentries = 22, + .compressed_buffer_segment_size_in_kbytes = 64, + .subvp_drr_scheduling_margin_us = 100, + .subvp_prefetch_end_to_mall_start_us = 15, + .subvp_fw_processing_delay = 15, + .max_vactive_det_fill_delay_us = 400, + + .fams2 = { + .max_allow_delay_us = 100 * 1000, + .scheduling_delay_us = 50, + .vertical_interrupt_ack_delay_us = 18, + .allow_programming_delay_us = 18, + .min_allow_width_us = 20, + .subvp_df_throttle_delay_us = 100, + .subvp_programming_delay_us = 18, + .subvp_prefetch_to_mall_delay_us = 18, + .drr_programming_delay_us = 18, + }, +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h new file mode 100644 index 000000000000..281d7ad230d8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_EXTERNAL_LIB_DEPS__ +#define __DML2_EXTERNAL_LIB_DEPS__ + +#include "os_types.h" + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h new file mode 100644 index 000000000000..a25f4e5977cf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_TOP_H__ +#define __DML_TOP_H__ + +#include "dml_top_types.h" + +/* + * Top Level Interface for DML2 + */ + +/* + * Returns the size of the DML instance for the caller to allocate + */ +unsigned int dml2_get_instance_size_bytes(void); + +/* + * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...) + */ +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out); + +/* + * Determines if the input mode is supported (boolean) on the SoC at all. Does not return + * information on how mode should be programmed. + */ +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out); + +/* + * Determines the full (optimized) programming for the input mode. Returns minimum + * clocks as well as dchub register programming values for all pipes, additional meta + * such as ODM or MPCC combine factors. + */ +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out); + +/* + * Determines the correct per pipe mcache register programming for a valid mode. + * The mcache allocation must have been calculated (successfully) in a previous + * call to dml2_build_mode_programming. + * The actual hubp viewport dimensions be what the actual registers will be + * programmed to (i.e. based on scaler setup). + */ +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h new file mode 100644 index 000000000000..8247289ce7d3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __dml2_TOP_DCHUB_REGISTERS_H__ +#define __dml2_TOP_DCHUB_REGISTERS_H__ + +#include "dml2_external_lib_deps.h" +// These types are uint32_t as they represent actual calculated register values for HW + +struct dml2_display_dlg_regs { + uint32_t refcyc_h_blank_end; + uint32_t dlg_vblank_end; + uint32_t min_dst_y_next_start; + uint32_t refcyc_per_htotal; + uint32_t refcyc_x_after_scaler; + uint32_t dst_y_after_scaler; + uint32_t dst_y_prefetch; + uint32_t dst_y_per_vm_vblank; + uint32_t dst_y_per_row_vblank; + uint32_t dst_y_per_vm_flip; + uint32_t dst_y_per_row_flip; + uint32_t ref_freq_to_pix_freq; + uint32_t vratio_prefetch; + uint32_t vratio_prefetch_c; + uint32_t refcyc_per_tdlut_group; + uint32_t refcyc_per_pte_group_vblank_l; + uint32_t refcyc_per_pte_group_vblank_c; + uint32_t refcyc_per_pte_group_flip_l; + uint32_t refcyc_per_pte_group_flip_c; + uint32_t dst_y_per_pte_row_nom_l; + uint32_t dst_y_per_pte_row_nom_c; + uint32_t refcyc_per_pte_group_nom_l; + uint32_t refcyc_per_pte_group_nom_c; + uint32_t refcyc_per_line_delivery_pre_l; + uint32_t refcyc_per_line_delivery_pre_c; + uint32_t refcyc_per_line_delivery_l; + uint32_t refcyc_per_line_delivery_c; + uint32_t refcyc_per_vm_group_vblank; + uint32_t refcyc_per_vm_group_flip; + uint32_t refcyc_per_vm_req_vblank; + uint32_t refcyc_per_vm_req_flip; + uint32_t dst_y_offset_cur0; + uint32_t chunk_hdl_adjust_cur0; + uint32_t vready_after_vcount0; + uint32_t dst_y_delta_drq_limit; + uint32_t refcyc_per_vm_dmdata; + uint32_t dmdata_dl_delta; + + // MRQ + uint32_t refcyc_per_meta_chunk_vblank_l; + uint32_t refcyc_per_meta_chunk_vblank_c; + uint32_t refcyc_per_meta_chunk_flip_l; + uint32_t refcyc_per_meta_chunk_flip_c; + uint32_t dst_y_per_meta_row_nom_l; + uint32_t dst_y_per_meta_row_nom_c; + uint32_t refcyc_per_meta_chunk_nom_l; + uint32_t refcyc_per_meta_chunk_nom_c; +}; + +struct dml2_display_ttu_regs { + uint32_t qos_level_low_wm; + uint32_t qos_level_high_wm; + uint32_t min_ttu_vblank; + uint32_t qos_level_flip; + uint32_t refcyc_per_req_delivery_l; + uint32_t refcyc_per_req_delivery_c; + uint32_t refcyc_per_req_delivery_cur0; + uint32_t refcyc_per_req_delivery_pre_l; + uint32_t refcyc_per_req_delivery_pre_c; + uint32_t refcyc_per_req_delivery_pre_cur0; + uint32_t qos_level_fixed_l; + uint32_t qos_level_fixed_c; + uint32_t qos_level_fixed_cur0; + uint32_t qos_ramp_disable_l; + uint32_t qos_ramp_disable_c; + uint32_t qos_ramp_disable_cur0; +}; + +struct dml2_display_arb_regs { + uint32_t max_req_outstanding; + uint32_t min_req_outstanding; + uint32_t sat_level_us; + uint32_t hvm_max_qos_commit_threshold; + uint32_t hvm_min_req_outstand_commit_threshold; + uint32_t compbuf_reserved_space_kbytes; + uint32_t compbuf_size; + uint32_t sdpif_request_rate_limit; + uint32_t allow_sdpif_rate_limit_when_cstate_req; + uint32_t dcfclk_deep_sleep_hysteresis; +}; + +struct dml2_cursor_dlg_regs{ + uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET + uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET + uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST + + uint32_t qos_level_fixed; + uint32_t qos_ramp_disable; +}; + +struct dml2_display_plane_rq_regs { + uint32_t chunk_size; + uint32_t min_chunk_size; + uint32_t dpte_group_size; + uint32_t mpte_group_size; + uint32_t swath_height; + uint32_t pte_row_height_linear; + + // MRQ + uint32_t meta_chunk_size; + uint32_t min_meta_chunk_size; +}; + +struct dml2_display_rq_regs { + struct dml2_display_plane_rq_regs rq_regs_l; + struct dml2_display_plane_rq_regs rq_regs_c; + uint32_t drq_expansion_mode; + uint32_t prq_expansion_mode; + uint32_t crq_expansion_mode; + uint32_t plane1_base_address; + uint32_t unbounded_request_enabled; + + // MRQ + uint32_t mrq_expansion_mode; +}; + +struct dml2_display_mcache_regs { + uint32_t mcache_id_first; + uint32_t mcache_id_second; + uint32_t split_location; +}; + +struct dml2_hubp_pipe_mcache_regs { + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } main; + struct { + struct dml2_display_mcache_regs p0; + struct dml2_display_mcache_regs p1; + } mall; +}; + +struct dml2_dchub_per_pipe_register_set { + struct dml2_display_rq_regs rq_regs; + struct dml2_display_ttu_regs ttu_regs; + struct dml2_display_dlg_regs dlg_regs; + + uint32_t det_size; +}; + +struct dml2_dchub_watermark_regs { + /* watermarks */ + uint32_t urgent; + uint32_t sr_enter; + uint32_t sr_exit; + uint32_t uclk_pstate; + uint32_t fclk_pstate; + uint32_t temp_read_or_ppt; + uint32_t usr; + /* qos */ + uint32_t refcyc_per_trip_to_mem; + uint32_t refcyc_per_meta_trip_to_mem; + uint32_t frac_urg_bw_flip; + uint32_t frac_urg_bw_nom; + uint32_t frac_urg_bw_mall; +}; + +enum dml2_dchub_watermark_reg_set_index { + DML2_DCHUB_WATERMARK_SET_A = 0, + DML2_DCHUB_WATERMARK_SET_B = 1, + DML2_DCHUB_WATERMARK_SET_C = 2, + DML2_DCHUB_WATERMARK_SET_D = 3, + DML2_DCHUB_WATERMARK_SET_NUM = 4, +}; + +struct dml2_dchub_global_register_set { + struct dml2_display_arb_regs arb_regs; + struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM]; + unsigned int num_watermark_sets; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h new file mode 100644 index 000000000000..fbf3e77f3d38 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ +#define __DML_TOP_DISPLAY_CFG_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML2_MAX_PLANES 8 +#define DML2_MAX_DCN_PIPES 8 +#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches + +enum dml2_swizzle_mode { + dml2_sw_linear, + dml2_sw_256b_2d, + dml2_sw_4kb_2d, + dml2_sw_64kb_2d, + dml2_sw_256kb_2d, + + dml2_gfx11_sw_linear, + dml2_gfx11_sw_64kb_d, + dml2_gfx11_sw_64kb_d_t, + dml2_gfx11_sw_64kb_d_x, + dml2_gfx11_sw_64kb_r_x, + dml2_gfx11_sw_256kb_d_x, + dml2_gfx11_sw_256kb_r_x +}; + +enum dml2_source_format_class { + dml2_444_8 = 0, + dml2_444_16 = 1, + dml2_444_32 = 2, + dml2_444_64 = 3, + dml2_420_8 = 4, + dml2_420_10 = 5, + dml2_420_12 = 6, + dml2_rgbe_alpha = 9, + dml2_rgbe = 10, + dml2_mono_8 = 11, + dml2_mono_16 = 12 +}; + +enum dml2_rotation_angle { + dml2_rotation_0 = 0, + dml2_rotation_90 = 1, + dml2_rotation_180 = 2, + dml2_rotation_270 = 3 +}; + +enum dml2_output_format_class { + dml2_444 = 0, + dml2_s422 = 1, + dml2_n422 = 2, + dml2_420 = 3 +}; + +enum dml2_output_encoder_class { + dml2_dp = 0, + dml2_edp = 1, + dml2_dp2p0 = 2, + dml2_hdmi = 3, + dml2_hdmifrl = 4, + dml2_none = 5 +}; + +enum dml2_output_link_dp_rate { + dml2_dp_rate_na = 0, + dml2_dp_rate_hbr = 1, + dml2_dp_rate_hbr2 = 2, + dml2_dp_rate_hbr3 = 3, + dml2_dp_rate_uhbr10 = 4, + dml2_dp_rate_uhbr13p5 = 5, + dml2_dp_rate_uhbr20 = 6 +}; + +enum dml2_uclk_pstate_change_strategy { + dml2_uclk_pstate_change_strategy_auto = 0, + dml2_uclk_pstate_change_strategy_force_vactive = 1, + dml2_uclk_pstate_change_strategy_force_vblank = 2, + dml2_uclk_pstate_change_strategy_force_drr = 3, + dml2_uclk_pstate_change_strategy_force_mall_svp = 4, + dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5, +}; + +enum dml2_svp_mode_override { + dml2_svp_mode_override_auto = 0, + dml2_svp_mode_override_main_pipe = 1, + dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions + dml2_svp_mode_override_phantom_pipe_no_data_return = 3, + dml2_svp_mode_override_imall = 4 +}; + +enum dml2_refresh_from_mall_mode_override { + dml2_refresh_from_mall_mode_override_auto = 0, + dml2_refresh_from_mall_mode_override_force_disable = 1, + dml2_refresh_from_mall_mode_override_force_enable = 2 +}; + +enum dml2_odm_mode { + dml2_odm_mode_auto = 0, + dml2_odm_mode_bypass, + dml2_odm_mode_combine_2to1, + dml2_odm_mode_combine_3to1, + dml2_odm_mode_combine_4to1, + dml2_odm_mode_split_1to2, + dml2_odm_mode_mso_1to2, + dml2_odm_mode_mso_1to4 +}; + +enum dml2_scaling_transform { + dml2_scaling_transform_explicit = 0, + dml2_scaling_transform_fullscreen, + dml2_scaling_transform_aspect_ratio, + dml2_scaling_transform_centered +}; + +enum dml2_dsc_enable_option { + dml2_dsc_disable = 0, + dml2_dsc_enable = 1, + dml2_dsc_enable_if_necessary = 2 +}; + +enum dml2_pstate_support_method { + dml2_pstate_method_uninitialized, + dml2_pstate_method_not_supported, + dml2_pstate_method_vactive, + dml2_pstate_method_vblank, + dml2_pstate_method_svp, + dml2_pstate_method_drr +}; + +enum dml2_tdlut_addressing_mode { + dml2_tdlut_sw_linear = 0, + dml2_tdlut_simple_linear = 1 +}; + +enum dml2_tdlut_width_mode { + dml2_tdlut_width_17_cube = 0, + dml2_tdlut_width_33_cube = 1 +}; + +enum dml2_twait_budgeting_setting { + dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait + + dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed + //(i.e. UCLK/FCLK DPM cannot be supported in active) + + dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that + // can support it +}; + +struct dml2_get_cursor_dlg_reg{ + unsigned int cursor_x_position; + unsigned int cursor_hotspot_x; + unsigned int cursor_primary_offset; + unsigned int cursor_secondary_offset; + bool cursor_stereo_en; + bool cursor_2x_magnify; + double hratio; + double pixel_rate_mhz; + double dlg_refclk_mhz; +}; + +/// @brief Surface Parameters +struct dml2_surface_cfg { + enum dml2_swizzle_mode tiling; + + struct { + unsigned long pitch; + unsigned long width; + unsigned long height; + } plane0; + + + struct { + unsigned long pitch; + unsigned long width; + unsigned long height; + } plane1; + + struct { + bool enable; + struct { + unsigned long pitch; + } plane0; + struct { + unsigned long pitch; + } plane1; + + struct { + double dcc_rate_plane0; + double dcc_rate_plane1; + double fraction_of_zero_size_request_plane0; + double fraction_of_zero_size_request_plane1; + } informative; + } dcc; +}; + + +struct dml2_composition_cfg { + enum dml2_rotation_angle rotation_angle; + bool mirrored; + enum dml2_scaling_transform scaling_transform; + bool rect_out_height_spans_vactive; + + struct { + bool stationary; + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane0; + + struct { + unsigned long width; + unsigned long height; + unsigned long x_start; + unsigned long y_start; + } plane1; + } viewport; + + struct { + bool enabled; + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane0; + + struct { + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int v_taps; + } plane1; + + unsigned long rect_out_width; + } scaler_info; +}; + +struct dml2_timing_cfg { + unsigned long h_total; + unsigned long v_total; + unsigned long h_blank_end; + unsigned long v_blank_end; + unsigned long h_front_porch; + unsigned long v_front_porch; + unsigned long h_sync_width; + unsigned long pixel_clock_khz; + unsigned long h_active; + unsigned long v_active; + unsigned int bpc; //FIXME: review with Jun + struct { + enum dml2_dsc_enable_option enable; + unsigned int dsc_compressed_bpp_x16; + struct { + // for dv to specify num dsc slices to use + unsigned int num_slices; + } overrides; + } dsc; + bool interlaced; + struct { + /* static */ + bool enabled; + unsigned long min_refresh_uhz; + unsigned int max_instant_vtotal_delta; + /* dynamic */ + bool disallowed; + bool drr_active_variable; + bool drr_active_fixed; + } drr_config; + unsigned long vblank_nom; +}; + +struct dml2_link_output_cfg { + enum dml2_output_format_class output_format; + enum dml2_output_encoder_class output_encoder; + unsigned int output_dp_lane_count; + enum dml2_output_link_dp_rate output_dp_link_rate; + unsigned long audio_sample_rate; + unsigned long audio_sample_layout; + bool output_disabled; // The stream does not go to a backend for output to a physical + //connector (e.g. writeback only, phantom pipe) goes to writeback + bool validate_output; // Do not validate the link configuration for this display stream. +}; + +struct dml2_writeback_cfg { + bool enable; + enum dml2_source_format_class pixel_format; + unsigned int active_writebacks_per_surface; + + struct { + bool enabled; + unsigned long input_width; + unsigned long input_height; + unsigned long output_width; + unsigned long output_height; + unsigned long v_taps; + unsigned long h_taps; + double h_ratio; + double v_ratio; + } scaling_info; +}; + +struct dml2_plane_parameters { + unsigned int stream_index; // Identifies which plane will be composed + + enum dml2_source_format_class pixel_format; + /* + * The surface and composition structures use + * the terms plane0 and plane1. These planes + * are expected to hold the following data based + * on the pixel format. + * + * RGB or YUV Non-Planar Types: + * dml2_444_8 + * dml2_444_16 + * dml2_444_32 + * dml2_444_64 + * dml2_rgbe + * + * plane0 = argb or rgbe + * plane1 = not used + * + * YUV Planar-Types: + * dml2_420_8 + * dml2_420_10 + * dml2_420_12 + * + * plane0 = luma + * plane1 = chroma + * + * RGB Planar Types: + * dml2_rgbe_alpha + * + * plane0 = rgbe + * plane1 = alpha + * + * Mono Non-Planar Types: + * dml2_mono_8 + * dml2_mono_16 + * + * plane0 = luma + * plane1 = not used + */ + + struct dml2_surface_cfg surface; + struct dml2_composition_cfg composition; + + struct { + bool enable; + unsigned long lines_before_active_required; + unsigned long transmitted_bytes; + } dynamic_meta_data; + + struct { + unsigned int num_cursors; + unsigned long cursor_width; + unsigned long cursor_bpp; + } cursor; + + // For TDLUT, SW would assume TDLUT is setup and enable all the time and + // budget for worst case addressing/width mode + struct { + bool setup_for_tdlut; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode; + enum dml2_tdlut_width_mode tdlut_width_mode; + bool tdlut_mpc_width_flag; + } tdlut; + + bool immediate_flip; + + struct { + // Logical overrides to power management policies (usually) + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + enum dml2_refresh_from_mall_mode_override refresh_from_mall; + unsigned int det_size_override_kb; + unsigned int mpcc_combine_factor; + long reserved_vblank_time_ns; // 0 = no override, -ve = no reserved time, +ve = explicit reserved time + unsigned int gpuvm_min_page_size_kbytes; + + enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config + + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + bool force_one_row_for_frame; + struct { + bool enable; + bool value; + } force_pte_buffer_mode; + double dppclk_mhz; + } hw; + } overrides; +}; + +struct dml2_stream_parameters { + struct dml2_timing_cfg timing; + struct dml2_link_output_cfg output; + struct dml2_writeback_cfg writeback; + + struct { + enum dml2_odm_mode odm_mode; + bool disable_dynamic_odm; + bool disable_subvp; + int minimum_vblank_idle_requirement_us; + bool minimize_active_latency_hiding; + + struct { + struct { + enum dml2_twait_budgeting_setting uclk_pstate; + enum dml2_twait_budgeting_setting fclk_pstate; + enum dml2_twait_budgeting_setting stutter_enter_exit; + } twait_budgeting; + } hw; + } overrides; +}; + +struct dml2_display_cfg { + bool gpuvm_enable; + bool hostvm_enable; + + // Allocate DET proportionally between streams based on pixel rate + // and then allocate proportionally between planes. + bool minimize_det_reallocation; + + unsigned int gpuvm_max_page_table_levels; + unsigned int hostvm_max_page_table_levels; + + struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES]; + struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES]; + + unsigned int num_planes; + unsigned int num_streams; + + struct { + struct { + // HW specific overrides, there's almost no reason to mess with these + // generally used for debugging or simulation + struct { + bool enable; + bool value; + } force_unbounded_requesting; + + struct { + bool enable; + bool value; + } force_nom_det_size_kbytes; + bool mode_support_check_disable; + bool mcache_admissibility_check_disable; + bool surface_viewport_size_check_disable; + double dlg_ref_clk_mhz; + double dispclk_mhz; + double dcfclk_mhz; + bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode + } hw; + + struct { + bool uclk_pstate_change_disable; + bool fclk_pstate_change_disable; + bool g6_temp_read_pstate_disable; + bool g7_ppt_pstate_disable; + } power_management; + + bool enhanced_prefetch_schedule_acceleration; + bool dcc_programming_assumes_scan_direction_unknown; + bool synchronize_timings; + bool synchronize_ddr_displays_for_uclk_pstate_change; + bool max_outstanding_when_urgent_expected_disable; + bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming + unsigned int best_effort_min_active_latency_hiding_us; + } overrides; +}; + +struct dml2_pipe_configuration_descriptor { + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane0; + + struct { + unsigned int viewport_x_start; + unsigned int viewport_width; + } plane1; + + bool plane1_enabled; + bool imall_enabled; +}; + +struct dml2_plane_mcache_configuration_descriptor { + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_mcache_surface_allocation *mcache_allocation; + + struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES]; + char num_pipes; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h new file mode 100644 index 000000000000..2f444f448770 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_TOP_POLICY_TYPES_H__ +#define __DML_TOP_POLICY_TYPES_H__ + +struct dml2_policy_parameters { + unsigned long odm_combine_dispclk_threshold_khz; + unsigned int max_immediate_flip_latency; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h new file mode 100644 index 000000000000..7d6461ca09bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ +#define __DML_TOP_SOC_PARAMETER_TYPES_H__ + +#include "dml2_external_lib_deps.h" + +#define DML_MAX_CLK_TABLE_SIZE 20 + +struct dml2_soc_derate_values { + unsigned int dram_derate_percent_pixel; + unsigned int dram_derate_percent_vm; + unsigned int dram_derate_percent_pixel_and_vm; + + unsigned int fclk_derate_percent; + unsigned int dcfclk_derate_percent; +}; + +struct dml2_soc_derates { + struct dml2_soc_derate_values system_active_urgent; + struct dml2_soc_derate_values system_active_average; + struct dml2_soc_derate_values dcn_mall_prefetch_urgent; + struct dml2_soc_derate_values dcn_mall_prefetch_average; + struct dml2_soc_derate_values system_idle_average; +}; + +struct dml2_dcn3_soc_qos_params { + struct { + unsigned int base_latency_us; + unsigned int base_latency_pixel_vm_us; + unsigned int base_latency_vm_us; + unsigned int scaling_factor_fclk_us; + unsigned int scaling_factor_mhz; + } urgent_latency_us; + + unsigned int loaded_round_trip_latency_fclk_cycles; + unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes; + unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes; +}; + +struct dml2_dcn4_uclk_dpm_dependent_qos_params { + unsigned long minimum_uclk_khz; + unsigned int urgent_ramp_uclk_cycles; + unsigned int trip_to_memory_uclk_cycles; + unsigned int meta_trip_to_memory_uclk_cycles; + unsigned int maximum_latency_when_urgent_uclk_cycles; + unsigned int average_latency_when_urgent_uclk_cycles; + unsigned int maximum_latency_when_non_urgent_uclk_cycles; + unsigned int average_latency_when_non_urgent_uclk_cycles; +}; + +struct dml2_dcn4_soc_qos_params { + unsigned int df_qos_response_time_fclk_cycles; + unsigned int max_round_trip_to_furthest_cs_fclk_cycles; + unsigned int mall_overhead_fclk_cycles; + unsigned int meta_trip_adder_fclk_cycles; + unsigned int average_transport_distance_fclk_cycles; + double umc_urgent_ramp_latency_margin; + double umc_max_latency_margin; + double umc_average_latency_margin; + double fabric_max_transport_latency_margin; + double fabric_average_transport_latency_margin; + struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE]; +}; + +enum dml2_qos_param_type { + dml2_qos_param_type_dcn3, + dml2_qos_param_type_dcn4 +}; + +struct dml2_soc_qos_parameters { + struct dml2_soc_derates derate_table; + struct { + unsigned int base_latency_us; + unsigned int scaling_factor_us; + unsigned int scaling_factor_mhz; + } writeback; + + union { + struct dml2_dcn3_soc_qos_params dcn3; + struct dml2_dcn4_soc_qos_params dcn4; + } qos_params; + + enum dml2_qos_param_type qos_type; +}; + +struct dml2_soc_power_management_parameters { + double dram_clk_change_blackout_us; + double dram_clk_change_read_only_us; + double dram_clk_change_write_only_us; + double fclk_change_blackout_us; + double g7_ppt_blackout_us; + double stutter_enter_plus_exit_latency_us; + double stutter_exit_latency_us; + double z8_stutter_enter_plus_exit_latency_us; + double z8_stutter_exit_latency_us; + double z8_min_idle_time; + double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE]; +}; + +struct dml2_clk_table { + unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE]; + unsigned char num_clk_values; +}; + +struct dml2_dram_params { + unsigned int channel_width_bytes; + unsigned int channel_count; + unsigned int transactions_per_clock; +}; + +struct dml2_soc_state_table { + struct dml2_clk_table uclk; + struct dml2_clk_table fclk; + struct dml2_clk_table dcfclk; + struct dml2_clk_table dispclk; + struct dml2_clk_table dppclk; + struct dml2_clk_table dtbclk; + struct dml2_clk_table phyclk; + struct dml2_clk_table socclk; + struct dml2_clk_table dscclk; + struct dml2_clk_table phyclk_d18; + struct dml2_clk_table phyclk_d32; + + struct dml2_dram_params dram_config; +}; + +struct dml2_soc_vmin_clock_limits { + unsigned long dispclk_khz; +}; + +struct dml2_soc_bb { + struct dml2_soc_state_table clk_table; + struct dml2_soc_qos_parameters qos_parameters; + struct dml2_soc_power_management_parameters power_management_parameters; + struct dml2_soc_vmin_clock_limits vmin_limit; + + unsigned int dprefclk_mhz; + unsigned int xtalclk_mhz; + unsigned int pcie_refclk_mhz; + unsigned int dchub_refclk_mhz; + unsigned int mall_allocated_for_dcn_mbytes; + unsigned int max_outstanding_reqs; + unsigned long fabric_datapath_to_dcn_data_return_bytes; + unsigned long return_bus_width_bytes; + unsigned long hostvm_min_page_size_kbytes; + unsigned long gpuvm_min_page_size_kbytes; + double phy_downspread_percent; + double dcn_downspread_percent; + double dispclk_dppclk_vco_speed_mhz; + bool do_urgent_latency_adjustment; + unsigned int mem_word_bytes; + unsigned int num_dcc_mcaches; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned long max_fclk_for_uclk_dpm_khz; +}; + +struct dml2_ip_capabilities { + unsigned int pipe_count; + unsigned int otg_count; + unsigned int num_dsc; + unsigned int max_num_dp2p0_streams; + unsigned int max_num_hdmi_frl_outputs; + unsigned int max_num_dp2p0_outputs; + unsigned int rob_buffer_size_kbytes; + unsigned int config_return_buffer_size_in_kbytes; + unsigned int meta_fifo_size_in_kentries; + unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int subvp_drr_scheduling_margin_us; + unsigned int subvp_prefetch_end_to_mall_start_us; + unsigned int subvp_fw_processing_delay; + unsigned int max_vactive_det_fill_delay_us; + + /* FAMS2 delays */ + struct { + unsigned int max_allow_delay_us; + unsigned int scheduling_delay_us; + unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int + unsigned int allow_programming_delay_us; // time requires to program allow + unsigned int min_allow_width_us; + unsigned int subvp_df_throttle_delay_us; + unsigned int subvp_programming_delay_us; + unsigned int subvp_prefetch_to_mall_delay_us; + unsigned int drr_programming_delay_us; + } fams2; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h new file mode 100644 index 000000000000..7dcc9cef2b58 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML_TOP_TYPES_H__ +#define __DML_TOP_TYPES_H__ + +#include "dml_top_types.h" +#include "dml_top_display_cfg_types.h" +#include "dml_top_soc_parameter_types.h" +#include "dml_top_policy_types.h" +#include "dml_top_dchub_registers.h" + +#include "dmub_cmd.h" + +struct dml2_instance; + +enum dml2_status { + dml2_success = 0, + dml2_error_generic = 1 +}; + +enum dml2_project_id { + dml2_project_invalid = 0, + dml2_project_dcn4x_stage1 = 1, + dml2_project_dcn4x_stage2 = 2, + dml2_project_dcn4x_stage2_auto_drr_svp = 3, +}; + +enum dml2_dram_clock_change_support { + dml2_dram_clock_change_vactive = 0, + dml2_dram_clock_change_vblank = 1, + dml2_dram_clock_change_vblank_and_vactive = 2, + dml2_dram_clock_change_drr = 3, + dml2_dram_clock_change_mall_svp = 4, + dml2_dram_clock_change_mall_full_frame = 6, + dml2_dram_clock_change_unsupported = 7 +}; + +enum dml2_fclock_change_support { + dml2_fclock_change_vactive = 0, + dml2_fclock_change_vblank = 1, + dml2_fclock_change_unsupported = 2 +}; + +enum dml2_output_type_and_rate__type { + dml2_output_type_unknown = 0, + dml2_output_type_dp = 1, + dml2_output_type_edp = 2, + dml2_output_type_dp2p0 = 3, + dml2_output_type_hdmi = 4, + dml2_output_type_hdmifrl = 5 +}; + +enum dml2_output_type_and_rate__rate { + dml2_output_rate_unknown = 0, + dml2_output_rate_dp_rate_hbr = 1, + dml2_output_rate_dp_rate_hbr2 = 2, + dml2_output_rate_dp_rate_hbr3 = 3, + dml2_output_rate_dp_rate_uhbr10 = 4, + dml2_output_rate_dp_rate_uhbr13p5 = 5, + dml2_output_rate_dp_rate_uhbr20 = 6, + dml2_output_rate_hdmi_rate_3x3 = 7, + dml2_output_rate_hdmi_rate_6x3 = 8, + dml2_output_rate_hdmi_rate_6x4 = 9, + dml2_output_rate_hdmi_rate_8x4 = 10, + dml2_output_rate_hdmi_rate_10x4 = 11, + dml2_output_rate_hdmi_rate_12x4 = 12 +}; + +struct dml2_pmo_options { + bool disable_vblank; + bool disable_svp; + bool disable_drr_var; + bool disable_drr_fixed; + bool disable_drr_var_when_var_active; + bool disable_fams2; + bool disable_dyn_odm; + bool disable_dyn_odm_for_multi_stream; + bool disable_dyn_odm_for_stream_with_svp; +}; + +struct dml2_options { + enum dml2_project_id project_id; + struct dml2_pmo_options pmo_options; +}; + +struct dml2_initialize_instance_in_out { + struct dml2_instance *dml2_instance; + struct dml2_options options; + struct dml2_soc_bb soc_bb; + struct dml2_ip_capabilities ip_caps; + + struct { + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + } overrides; +}; + +struct dml2_reset_instance_in_out { + struct dml2_instance *dml2_instance; +}; + +struct dml2_check_mode_supported_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + bool is_supported; +}; + +struct dml2_mcache_surface_allocation { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Shift grainularity is not necessarily 1 + */ + struct { + int p0; + int p1; + } shift_granularity; + + /* + * MCacheIDs have global scope in the SoC, and they are stored here. + * These IDs are generally not valid until all planes in a display + * configuration have had their mcache requirements calculated. + */ + int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1]; + + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + + struct { + int meta_row_bytes_plane0; + int meta_row_bytes_plane1; + } informative; +}; + +enum dml2_uclk_pstate_support_method { + dml2_uclk_pstate_support_method_not_supported = 0, + /* hw */ + dml2_uclk_pstate_support_method_vactive = 1, + dml2_uclk_pstate_support_method_vblank = 2, + dml2_uclk_pstate_support_method_reserved_hw = 5, + /* fw */ + dml2_uclk_pstate_support_method_fw_subvp_phantom = 6, + dml2_uclk_pstate_support_method_reserved_fw = 10, + /* fw w/drr */ + dml2_uclk_pstate_support_method_fw_vactive_drr = 11, + dml2_uclk_pstate_support_method_fw_vblank_drr = 12, + dml2_uclk_pstate_support_method_fw_subvp_phantom_drr = 13, + dml2_uclk_pstate_support_method_reserved_fw_drr_fixed = 20, + dml2_uclk_pstate_support_method_fw_drr = 21, + dml2_uclk_pstate_support_method_reserved_fw_drr_var = 22, + + dml2_uclk_pstate_support_method_count +}; + +struct dml2_per_plane_programming { + const struct dml2_plane_parameters *plane_descriptor; + + union { + struct { + unsigned long dppclk_khz; + } dcn4; + } min_clocks; + + struct dml2_mcache_surface_allocation mcache_allocation; + + // If a stream is using automatic or forced odm combine + // and the stream for this plane has num_odms_required > 1 + // num_dpps_required is always equal to num_odms_required for + // ALL planes of the stream + + // If a stream is using odm split, then this value is always 1 + unsigned int num_dpps_required; + + enum dml2_uclk_pstate_support_method uclk_pstate_support_method; + + // MALL size requirements for MALL SS and SubVP + unsigned int surface_size_mall_bytes; + unsigned int svp_size_mall_bytes; + + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + + struct { + bool valid; + struct dml2_plane_parameters descriptor; + struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; + } phantom_plane; +}; + +union dml2_global_sync_programming { + struct { + unsigned int vstartup_lines; + unsigned int vupdate_offset_pixels; + unsigned int vupdate_vupdate_width_pixels; + unsigned int vready_offset_pixels; + } dcn4; +}; + +struct dml2_per_stream_programming { + const struct dml2_stream_parameters *stream_descriptor; + + union { + struct { + unsigned long dscclk_khz; + unsigned long dtbclk_khz; + unsigned long phyclk_khz; + } dcn4; + } min_clocks; + + union dml2_global_sync_programming global_sync; + + unsigned int num_odms_required; + + enum dml2_uclk_pstate_support_method uclk_pstate_method; + + struct { + bool enabled; + struct dml2_stream_parameters descriptor; + union dml2_global_sync_programming global_sync; + } phantom_stream; + + struct dmub_fams2_stream_static_state fams2_params; +}; + +//----------------- +// Mode Support Information +//----------------- + +struct dml2_mode_support_info { + bool ModeIsSupported; //<brief Is the mode support any voltage and combine setting + bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming + // Mode Support Reason + bool WritebackLatencySupport; + bool ScaleRatioAndTapsSupport; + bool SourceFormatPixelAndScanSupport; + bool P2IWith420; + bool DSCOnlyIfNecessaryWithBPP; + bool DSC422NativeNotSupported; + bool LinkRateDoesNotMatchDPVersion; + bool LinkRateForMultistreamNotIndicated; + bool BPPForMultistreamNotIndicated; + bool MultistreamWithHDMIOreDP; + bool MSOOrODMSplitWithNonDPLink; + bool NotEnoughLanesForMSO; + bool NumberOfOTGSupport; + bool NumberOfHDMIFRLSupport; + bool NumberOfDP2p0Support; + bool WritebackScaleRatioAndTapsSupport; + bool CursorSupport; + bool PitchSupport; + bool ViewportExceedsSurface; + bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified; + bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + bool InvalidCombinationOfMALLUseForPStateAndStaticScreen; + bool InvalidCombinationOfMALLUseForPState; + bool ExceededMALLSize; + bool EnoughWritebackUnits; + bool ExceededMultistreamSlots; + bool NotEnoughDSCUnits; + bool NotEnoughDSCSlices; + bool PixelsPerLinePerDSCUnitSupport; + bool DSCCLKRequiredMoreThanSupported; + bool DTBCLKRequiredMoreThanSupported; + bool LinkCapacitySupport; + bool ROBSupport; + bool ROBUrgencyAvoidance; + bool OutstandingRequestsSupport; + bool OutstandingRequestsUrgencyAvoidance; + bool PTEBufferSizeNotExceeded; + bool DCCMetaBufferSizeNotExceeded; + bool TotalVerticalActiveBandwidthSupport; + bool VActiveBandwidthSupport; + enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + bool USRRetrainingSupport; + bool PrefetchSupported; + bool DynamicMetadataSupported; + bool VRatioInPrefetchSupported; + bool DISPCLK_DPPCLK_Support; + bool TotalAvailablePipesSupport; + bool ViewportSizeSupport; + bool ImmediateFlipSupportedForState; + double MaxTotalVerticalActiveAvailableBandwidth; + bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming + bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required + unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode + double OutputBpp[DML2_MAX_PLANES]; + enum dml2_output_type_and_rate__type OutputType[DML2_MAX_PLANES]; + enum dml2_output_type_and_rate__rate OutputRate[DML2_MAX_PLANES]; + unsigned int AlignedYPitch[DML2_MAX_PLANES]; + unsigned int AlignedCPitch[DML2_MAX_PLANES]; + bool g6_temp_read_support; +}; // dml2_mode_support_info + +struct dml2_display_cfg_programming { + struct dml2_display_cfg display_config; + + union { + struct { + unsigned long dcfclk_khz; + unsigned long fclk_khz; + unsigned long uclk_khz; + unsigned long socclk_khz; + unsigned long dispclk_khz; + unsigned long dcfclk_deepsleep_khz; + unsigned long dpp_ref_khz; + } dcn3; + struct { + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } active; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } idle; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } svp_prefetch; + + unsigned long deepsleep_dcfclk_khz; + unsigned long dispclk_khz; + unsigned long dpprefclk_khz; + unsigned long dtbrefclk_khz; + unsigned long socclk_khz; + + struct { + uint32_t dispclk_did; + uint32_t dpprefclk_did; + uint32_t dtbrefclk_did; + } divider_ids; + } dcn4; + } min_clocks; + + bool uclk_pstate_supported; + bool fclk_pstate_supported; + + /* indicates this configuration requires FW to support */ + bool fams2_required; + + struct { + bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition + } stutter; + + struct { + bool meets_eco; // Stutter cycles will meet Z8 ECO criteria + bool supported_in_blank; // Changing to configurations where this is false requires Z8 to be disabled during the transition + } z8_stutter; + + struct dml2_dchub_global_register_set global_regs; + + struct dml2_per_plane_programming plane_programming[DML2_MAX_PLANES]; + struct dml2_per_stream_programming stream_programming[DML2_MAX_PLANES]; + + // Don't access this structure directly, access it through plane_programming.pipe_regs + struct dml2_dchub_per_pipe_register_set pipe_regs[DML2_MAX_PLANES]; + + struct { + struct { + double urgent_us; + double writeback_urgent_us; + double writeback_pstate_us; + double writeback_fclk_pstate_us; + double cstate_exit_us; + double cstate_enter_plus_exit_us; + double z8_cstate_exit_us; + double z8_cstate_enter_plus_exit_us; + double pstate_change_us; + double fclk_pstate_change_us; + double usr_retraining_us; + double g6_temp_read_watermark_us; + } watermarks; + + struct { + unsigned int swath_width_plane0; + unsigned int swath_height_plane0; + unsigned int swath_height_plane1; + unsigned int dpte_row_height_plane0; + unsigned int dpte_row_height_plane1; + unsigned int meta_row_height_plane0; + unsigned int meta_row_height_plane1; + } plane_info[DML2_MAX_PLANES]; + + struct { + unsigned long long total_surface_size_in_mall_bytes; + unsigned int subviewport_lines_needed_in_mall[DML2_MAX_PLANES]; + } mall; + + struct { + double urgent_latency_us; // urgent ramp latency + double max_non_urgent_latency_us; + double max_urgent_latency_us; + double avg_non_urgent_latency_us; + double avg_urgent_latency_us; + double wm_memory_trip_us; + double meta_trip_memory_us; + double fraction_of_urgent_bandwidth; // nom + double fraction_of_urgent_bandwidth_immediate_flip; + double fraction_of_urgent_bandwidth_mall; + double max_active_fclk_change_latency_supported; + unsigned int min_return_latency_in_dcfclk; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + double dram_vm_only_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + double dram_vm_only_bw_mbps; + } sys_active; + } urg_bw_available; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } avg_bw_available; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } non_urg_bw_required; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } non_urg_bw_required_with_flip; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + + } urg_bw_required; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } urg_bw_required_with_flip; + + struct { + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } svp_prefetch; + + struct { + double sdp_bw_mbps; + double dram_bw_mbps; + } sys_active; + } avg_bw_required; + } qos; + + struct { + unsigned long long det_size_in_kbytes[DML2_MAX_PLANES]; + unsigned long long DETBufferSizeY[DML2_MAX_PLANES]; + unsigned long long comp_buffer_size_kbytes; + bool UnboundedRequestEnabled; + unsigned int compbuf_reserved_space_64b; + } crb; + + struct { + unsigned int max_uncompressed_block_plane0; + unsigned int max_compressed_block_plane0; + unsigned int independent_block_plane0; + unsigned int max_uncompressed_block_plane1; + unsigned int max_compressed_block_plane1; + unsigned int independent_block_plane1; + } dcc_control[DML2_MAX_PLANES]; + + struct { + double stutter_efficiency; + double stutter_efficiency_with_vblank; + double stutter_num_bursts; + + struct { + double stutter_efficiency; + double stutter_efficiency_with_vblank; + double stutter_num_bursts; + double stutter_period; + + struct { + double stutter_efficiency; + double stutter_num_bursts; + double stutter_period; + } bestcase; + } z8; + } power_management; + + struct { + double min_ttu_vblank_us[DML2_MAX_PLANES]; + bool vready_at_or_after_vsync[DML2_MAX_PLANES]; + double min_dst_y_next_start[DML2_MAX_PLANES]; + bool cstate_max_cap_mode; + bool hw_debug5; + unsigned int dcfclk_deep_sleep_hysteresis; + unsigned int dst_x_after_scaler[DML2_MAX_PLANES]; + unsigned int dst_y_after_scaler[DML2_MAX_PLANES]; + unsigned int prefetch_source_lines_plane0[DML2_MAX_PLANES]; + unsigned int prefetch_source_lines_plane1[DML2_MAX_PLANES]; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool UsesMALLForStaticScreen[DML2_MAX_PLANES]; + unsigned int CursorDstXOffset[DML2_MAX_PLANES]; + unsigned int CursorDstYOffset[DML2_MAX_PLANES]; + unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double TimePerVMGroupVBlank[DML2_MAX_PLANES]; + double TimePerVMGroupFlip[DML2_MAX_PLANES]; + double TimePerVMRequestVBlank[DML2_MAX_PLANES]; + double TimePerVMRequestFlip[DML2_MAX_PLANES]; + double Tdmdl_vm[DML2_MAX_PLANES]; + double Tdmdl[DML2_MAX_PLANES]; + unsigned int VStartup[DML2_MAX_PLANES]; + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + + double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES]; + double time_per_pte_group_nom_luma[DML2_MAX_PLANES]; + double time_per_pte_group_nom_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_luma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_luma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_chroma[DML2_MAX_PLANES]; + double VRatioPrefetchY[DML2_MAX_PLANES]; + double VRatioPrefetchC[DML2_MAX_PLANES]; + double DestinationLinesForPrefetch[DML2_MAX_PLANES]; + double DestinationLinesToRequestVMInVBlank[DML2_MAX_PLANES]; + double DestinationLinesToRequestRowInVBlank[DML2_MAX_PLANES]; + double DestinationLinesToRequestVMInImmediateFlip[DML2_MAX_PLANES]; + double DestinationLinesToRequestRowInImmediateFlip[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + + double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; + double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; + double DSCCLK_calculated[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + double DSCDelay[DML2_MAX_PLANES]; + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY + } misc; + + struct dml2_mode_support_info mode_support_info; + unsigned int voltage_level; // LEGACY_ONLY + + // For DV only + // This is what dml core calculated, only on the full_vp width and assume we have + // unlimited # of mcache + struct dml2_mcache_surface_allocation non_optimized_mcache_allocation[DML2_MAX_PLANES]; + + bool failed_mcache_validation; + bool failed_dpmm; + bool failed_mode_programming; + } informative; +}; + +struct dml2_build_mode_programming_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_build_mcache_programming_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + + struct dml2_plane_mcache_configuration_descriptor mcache_configurations[DML2_MAX_PLANES]; + char num_configurations; + + /* + * Outputs + */ + // per_plane_pipe_mcache_regs[i][j] refers to the proper programming for the j-th pipe of the + // i-th plane (from mcache_configurations) + struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + + // It's not a good idea to reference this directly, better to use the pointer structure above instead + struct dml2_hubp_pipe_mcache_regs mcache_regs_set[DML2_MAX_DCN_PIPES]; +}; + +struct dml2_unit_test_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c new file mode 100644 index 000000000000..0bbd2f47cb79 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -0,0 +1,628 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml2_core_shared_types.h" +#include "dml2_core_dcn4.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +struct dml2_core_ip_params core_dcn4_ip_caps_base = { + // Hardcoded values for DCN3x + .vblank_nom_default_us = 668, + .remote_iommu_outstanding_translations = 256, + .rob_buffer_size_kbytes = 128, + .config_return_buffer_size_in_kbytes = 1280, + .config_return_buffer_segment_size_in_kbytes = 64, + .compressed_buffer_segment_size_in_kbytes = 64, + .dpte_buffer_size_in_pte_reqs_luma = 68, + .dpte_buffer_size_in_pte_reqs_chroma = 36, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .writeback_chunk_size_kbytes = 8, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + //Number of pipes after DCN Pipe harvesting + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .cursor_buffer_size = 24, + .cursor_chunk_size = 2, + .dispclk_delay_subtotal = 125, + .max_inter_dcn_tile_repeaters = 8, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .writeback_line_buffer_buffer_size = 0, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 5760, + .dsc422_native_support = true, + .dcc_supported = true, + .ptoi_supported = false, + + .cursor_64bpp_support = true, + .dynamic_metadata_vm_enabled = false, + + .max_num_hdmi_frl_outputs = 1, + .max_num_dp2p0_outputs = 4, + .max_num_dp2p0_streams = 4, + .imall_supported = 1, + .max_flip_time_us = 80, + .words_per_channel = 16, + + .subvp_fw_processing_delay_us = 15, + .subvp_pstate_allow_width_us = 20, + .subvp_swath_height_margin_lines = 16, +}; + +static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) +{ + ip_caps->pipe_count = ip_params->max_num_dpp; + ip_caps->otg_count = ip_params->max_num_otg; + ip_caps->num_dsc = ip_params->num_dsc; + ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams; + ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs; + ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes; + ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes; + ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; + ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; + + // FIXME_STAGE2: cleanup after adding all dv override to ip_caps + ip_caps->subvp_drr_scheduling_margin_us = 100; + ip_caps->subvp_prefetch_end_to_mall_start_us = 15; + ip_caps->subvp_fw_processing_delay = 16; + +} + +static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps) +{ + ip_params->max_num_dpp = ip_caps->pipe_count; + ip_params->max_num_otg = ip_caps->otg_count; + ip_params->num_dsc = ip_caps->num_dsc; + ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams; + ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs; + ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes; + ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes; + ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; + ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; +} + +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) +{ + struct dml2_core_instance *core = in_out->instance; + + if (!in_out->minimum_clock_table) + return false; + else + core->minimum_clock_table = in_out->minimum_clock_table; + + if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) { + memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size); + + // FIXME_STAGE2: + // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info + // Should move DV to use ip_caps but need move more overrides to ip_caps + patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb); + core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us; + core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines; + } else { + memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params)); + patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps); + + core->clean_me_up.mode_lib.ip.imall_supported = false; + } + + memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + return true; +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.dsc.enable = dml2_dsc_disable; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_ceil2( + (double)phantom->composition.viewport.plane0.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->composition.viewport.plane1.height = (long int unsigned) math_ceil2( + (double)phantom->composition.viewport.plane1.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg, + const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index; + int total_pipe_regs_copied = 0; + int dml_internal_pipe_index = 0; + const struct dml2_plane_parameters *main_plane; + const struct dml2_plane_parameters *phantom_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + // Copy the unexpanded display config to output + memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + + // Set the global register values + dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs); + // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion + // display config + dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]); + + // Check if FAMS2 is required + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + programming->fams2_required = display_cfg->stage3.fams2_required; + } + + // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) + for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) { + main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]]; + + // Set the descriptor + programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index]; + + // Set the odm combine factor + programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used; + + // Check if the stream has implicit SVP enabled + if (main_stream != phantom_stream) { + // If so, copy the phantom stream descriptor + programming->stream_programming[stream_index].phantom_stream.enabled = true; + memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters)); + } else { + programming->stream_programming[stream_index].phantom_stream.enabled = false; + } + + // Due to the way DML indexes data internally, it's easier to populate the rest of the display + // stream programming in the next stage + } + + dml_internal_pipe_index = 0; + total_pipe_regs_copied = 0; + stream_already_populated_mask = 0x0; + + // Loop over all main planes + for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) { + main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + + // Set the descriptor + programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index]; + + // Set the mpc combine factor + programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + // Setup the appropriate p-state strategy + if (display_cfg->stage3.performed && display_cfg->stage3.success) { + switch (display_cfg->stage3.pstate_switch_modes[plane_index]) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_drr: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; + break; + case dml2_uclk_pstate_support_method_reserved_hw: + case dml2_uclk_pstate_support_method_reserved_fw: + case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: + case dml2_uclk_pstate_support_method_reserved_fw_drr_var: + case dml2_uclk_pstate_support_method_not_supported: + case dml2_uclk_pstate_support_method_count: + default: + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + break; + } + } else { + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the main plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) { + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index); + + programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method; + + // If FAMS2 is required, populate stream params + if (programming->fams2_required) { + dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, + display_cfg, + &programming->stream_programming[main_plane->stream_index].fams2_params, + programming->stream_programming[main_plane->stream_index].uclk_pstate_method, + plane_index); + } + + stream_already_populated_mask |= (0x1 << main_plane->stream_index); + } + dml_internal_pipe_index++; + } + } + + for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) { + phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index]; + main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index]; + main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index]; + + programming->plane_programming[main_plane_index].phantom_plane.valid = true; + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters)); + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index); + for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) { + // Assign storage for this pipe's register values + programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; + memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate the phantom plane regs + dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index); + // Populate the phantom stream specific programming + if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) { + dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << phantom_plane->stream_index); + } + + dml_internal_pipe_index++; + } + } +} + +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals; + + bool result; + unsigned int i, stream_index, stream_bitmask; + int unsigned odm_count, dpp_count; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_support_ex_params.min_clk_table = in_out->min_clk_table; + l->mode_support_ex_params.min_clk_index = in_out->min_clk_index; + l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info; + + result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params); + + in_out->mode_support_result.cfg_support_info.is_supported = result; + + if (result) { + in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000); + in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000); + in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000); + + in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported; + in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported; + + in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000); + in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000); + + + in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000; + in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0; + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0; + + in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0); + + in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); + dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); + dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); + dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); + dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); + + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); + } + + stream_bitmask = 0; + for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { + switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) { + case dml2_odm_mode_bypass: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + case dml2_odm_mode_combine_2to1: + odm_count = 2; + dpp_count = 2; + break; + case dml2_odm_mode_combine_3to1: + odm_count = 3; + dpp_count = 3; + break; + case dml2_odm_mode_combine_4to1: + odm_count = 4; + dpp_count = 4; + break; + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + case dml2_odm_mode_auto: + default: + odm_count = 1; + dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i]; + break; + } + + in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count; + + dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i); + + stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; + + in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; + dml2_printf("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); + + if (!((stream_bitmask >> stream_index) & 0x1)) { + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i]; + in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i]; + dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i); + in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000); + stream_bitmask |= 0x1 << stream_index; + } + } + } + + return result; +} + +static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb) +{ + int i; + + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i]) + return i; + } + return 0; +} + +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out) +{ + struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance; + struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals; + + bool result = false; + unsigned int pipe_offset; + int dml_internal_pipe_index; + int total_pipe_regs_copied = 0; + int stream_already_populated_mask = 0; + + int main_stream_index; + unsigned int plane_index; + + expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch); + + l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib; + l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg; + l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; + l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; + l->mode_programming_ex_params.programming = in_out->programming; + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4.active.uclk_khz, + &core->clean_me_up.mode_lib.soc); + + result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); + + if (result) { + // If the input display configuration contains implict SVP, we need to use a special packer + if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) { + pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch); + } else { + memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg)); + + dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs); + dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]); + + dml_internal_pipe_index = 0; + + for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { + in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; + + if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } + + dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + + for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { + in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index]; + + // Assign storage for this pipe's register values + in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied]; + memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set)); + total_pipe_regs_copied++; + + // Populate + dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index); + + main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index; + + // Multiple planes can refer to the same stream index, so it's only necessary to populate it once + if (!(stream_already_populated_mask & (0x1 << main_stream_index))) { + in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index]; + in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used; + dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index); + + stream_already_populated_mask |= (0x1 << main_stream_index); + } + dml_internal_pipe_index++; + } + } + } + } + + return result; +} + +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib; + + if (in_out->mode_is_supported) + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index; + else + in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index; + + dml2_core_calcs_get_informative(mode_lib, in_out->programming); + return true; +} + +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out) +{ + memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation)); + + dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index); + + if (in_out->mcache_allocation->num_mcaches_plane0 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width; + + if (in_out->mcache_allocation->num_mcaches_plane1 > 0) + in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width; + + in_out->mcache_allocation->requires_dedicated_mall_mcache = false; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h new file mode 100644 index 000000000000..235280c6dcf5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_CORE_DCN4_H__ +#define __DML2_CORE_DCN4_H__ +bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); +bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out); +bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out); +bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out); +bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out); + +bool core_dcn4_unit_test(void); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c new file mode 100644 index 000000000000..846b0ae48596 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -0,0 +1,12269 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml2_core_dcn4_calcs.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml_top_types.h" +#include "dml2_core_shared.h" + +#define DML_VM_PTE_ADL_PATCH_EN +//#define DML_TVM_UPDATE_EN +#define DML_TDLUT_ROW_BYTES_FIX_EN +#define DML_REG_LIMIT_CLAMP_EN +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 + +static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420); + if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) + dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = 0x%x\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = 0x%x\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = 0x%x\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->ROBUrgencyAvoidance == 0) + dml2_printf("DML: support: ROBUrgencyAvoidance = %d\n", support->ROBUrgencyAvoidance); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + dml2_printf("DML: ===================================== \n"); +} + +static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); + dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + return is_phantom; +} + +#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \ +{ \ +unsigned int plane_idx; \ +plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes); +dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes); +dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY); +dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC); +dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear); +dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma); + +dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup); +dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); +dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); +dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); +dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); +dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); +dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL); + +#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \ +{ \ +return (type) interval_var[plane_idx]; \ +} + +dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l); +dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l); +dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l); +dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c); +dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c); +dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c); +dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l); +dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c); +dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache); +dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL); +dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines); + +#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \ +{ \ +return (type) interval_var[plane_idx][array_idx]; \ +} + +dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l); +dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c); + +#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \ +{ \ +return (type) internal_var; \ +} + +dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark); +dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark); +dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark); +dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark); +dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark); +dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); + +dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); +dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); +dml_get_var_func(wm_g6_temp_read, double, mode_lib->mp.Watermark.g6_temp_read_watermark_us); +dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); +dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); +dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); +dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); +dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); +dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); +dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); +dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); +dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank); +dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency); +dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame); +dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod); +dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase); +dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase); +dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase); +dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported); +dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0); + +dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + +dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + +dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); + +dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); +dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); +dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); +dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); + +dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); +dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); +dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + +dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte); + +dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled); +dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark); + +dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b); +dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5); +dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + +static void CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + bool is_mrq_present, + + // Output + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte) +{ + if (is_mrq_present) + *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64); + else + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + + *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); +#endif + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) +{ + //unsigned int num_active_planes = display_cfg->num_planes; + + //Progressive To Interlace Unit Effect + for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { + PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { + // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly + //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; + } + } +} + +static bool dml_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + +static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +static void CalculateBytePerPixelAndBlockSizes( + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + unsigned int pitch_y, + unsigned int pitch_c, + + // Output + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC, + bool *surf_linear128_l, + bool *surf_linear128_c) +{ + *BytePerPixelDETY = 0; + *BytePerPixelDETC = 0; + *BytePerPixelY = 0; + *BytePerPixelC = 0; + + if (SourcePixelFormat == dml2_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml2_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml2_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else if (SourcePixelFormat == dml2_420_10) { + *BytePerPixelDETY = (double)(4.0 / 3); + *BytePerPixelDETC = (double)(8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML2_ASSERT(0); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y); + dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c); + dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); +#endif + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + *surf_linear128_l = 0; + *surf_linear128_c = 0; + } else { + if (SurfaceTiling == dml2_sw_linear) { + *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); + + if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) + *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); + } + } + + if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml2_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml2_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { // dual plane + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml2_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); +#endif + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + if (SurfaceTiling == dml2_gfx11_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + } else { + unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); + unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling + + if (SurfaceTiling == dml2_sw_linear) { + macro_tile_scale = 1; + } else if (SurfaceTiling == dml2_sw_4kb_2d) { + macro_tile_scale = 4; + } else if (SurfaceTiling == dml2_sw_64kb_2d) { + macro_tile_scale = 16; + } else if (SurfaceTiling == dml2_sw_256kb_2d) { + macro_tile_scale = 32; + } else { + dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML2_ASSERT(0); + } + + *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; + *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +#endif +} + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum dml2_source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + // Output + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP) +{ + double DPPCLKUsingSingleDPPLuma; + double DPPCLKUsingSingleDPPChroma; + + if (HRatio > 1) { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +static void CalculateSwathWidth( + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum dml2_odm_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + bool surf_linear128_l[], + bool surf_linear128_c[], + unsigned int DPPPerSurface[], + + // Output + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + unsigned int SwathWidthSingleDPPY[], + unsigned int SwathWidthSingleDPPC[], + unsigned int SwathWidthY[], // per-pipe + unsigned int SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]) // per-pipe +{ + enum dml2_odm_mode MainSurfaceODMMode; + double odm_hactive_factor = 1.0; + unsigned int req_width_horz_y; + unsigned int req_width_horz_c; + unsigned int surface_width_ub_l; + unsigned int surface_height_ub_l; + unsigned int surface_width_ub_c; + unsigned int surface_height_ub_c; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + } else { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + MainSurfaceODMMode = ODMMode[k]; + for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { + if (display_cfg->plane_descriptors[k].stream_index == j) { + MainSurfaceODMMode = ODMMode[j]; + } + } + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) + odm_hactive_factor = 4.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) + odm_hactive_factor = 3.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) + odm_hactive_factor = 2.0; + + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { + SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); +#endif + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + req_width_horz_y = Read256BytesBlockWidthY[k]; + req_width_horz_c = Read256BytesBlockWidthC[k]; + + if (surf_linear128_l[k]) + req_width_horz_y = req_width_horz_y / 2; + + if (surf_linear128_c[k]) + req_width_horz_c = req_width_horz_c / 2; + + surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); + surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); + surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); + surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + req_per_swath_ub_l[k] = 0; + req_per_swath_ub_c[k] = 0; + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; + + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; + } else { + swath_width_chroma_ub[k] = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); +#endif + + } +} + +static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) +{ + bool unb_req_ok = false; + bool unb_req_en = false; + + unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + unb_req_en = unb_req_ok; + + if (unb_req_force_en) { + unb_req_en = unb_req_force_val && unb_req_ok; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); +#endif + return (unb_req_en); +} + +static void CalculateDETBufferSize( + struct dml2_core_shared_CalculateDETBufferSize_locals *l, + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + bool UnboundedRequestEnabled, + unsigned int nomDETInKByte, + unsigned int MaxTotalDETInKByte, + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int MinCompressedBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInkByte, + unsigned int CompressedBufferSegmentSizeInkByte, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int full_swath_bytes_l[], + unsigned int full_swath_bytes_c[], + unsigned int DPPPerSurface[], + // Output + unsigned int DETBufferSizeInKByte[], + unsigned int *CompressedBufferSizeInkByte) +{ + memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals)); + + bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; + bool NextPotentialSurfaceToAssignDETPieceFound; + bool MinimizeReallocationSuccess = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); +#endif + + // Note: Will use default det size if that fits 2 swaths + if (UnboundedRequestEnabled) { + if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb; + } else { + DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)); + } + *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; + } else { + l->DETBufferSizePoolInKByte = MaxTotalDETInKByte; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + DETBufferSizeInKByte[k] = 0; + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte; + } else { + l->max_minDET = nomDETInKByte; + } + l->minDET = 128; + l->minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) { + if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET) + l->minDET_pipe = l->minDET; + l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); + dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); + dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); +#endif + + if (l->minDET_pipe == 0) { + l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); +#endif + } + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETBufferSizeInKByte[k] = 0; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { + DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) { + DETBufferSizeInKByte[k] = l->minDET_pipe; + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); +#endif + } + + if (display_cfg->minimize_det_reallocation) { + MinimizeReallocationSuccess = true; + // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global + // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on + // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a + // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. + + // Calculate total pixel rate + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz; + } + + // Calculate per stream DET budget + for (unsigned int k = 0; k < display_cfg->num_streams; ++k) { + l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate); + l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k]; + } + + // Calculate the per stream total bandwidth + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + + // Check the minimum can be satisfied by budget + if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k]) { + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k]; + } else { + MinimizeReallocationSuccess = false; + break; + } + } + } + + if (MinimizeReallocationSuccess) { + // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams + // budget proportionally across its planes + l->ResidualDETAfterRounding = MaxTotalDETInKByte; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index]) + * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]); + + if (l->IdealDETBudget > DETBufferSizeInKByte[k]) { + l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k]; + if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]) + l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]; + + DETBufferSizeInKByte[k] += l->DeltaDETBudget; + l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget; + } + + // Split among the pipes per the plane + DETBufferSizeInKByte[k] = (unsigned int)((double)DETBufferSizeInKByte[k] / (ForceSingleDPP ? 1 : DPPPerSurface[k])); + + // Round down to segment size + DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / CompressedBufferSegmentSizeInkByte) * CompressedBufferSegmentSizeInkByte; + + l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k]; + } + } + } + } + + if (!MinimizeReallocationSuccess) { + l->TotalBandwidth = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + } + dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__); +#endif + dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); + l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); +#endif + } + + for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + l->NextSurfaceToAssignDETPiece = 0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); +#endif + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { + l->NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); +#endif + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + l->NextDETBufferPieceInKByte = (unsigned int)(math_min2( + math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece / + ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)) + * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, + math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); + dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); + dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); + dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); + dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); +#endif + + l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; + l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + } + } + } + *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; + } + *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + } +#endif +} + +static double CalculateRequiredDispclk( + enum dml2_odm_mode ODMMode, + double PixelClock) +{ + + if (ODMMode == dml2_odm_mode_combine_4to1) { + return PixelClock / 4.0; + } else if (ODMMode == dml2_odm_mode_combine_3to1) { + return PixelClock / 3.0; + } else if (ODMMode == dml2_odm_mode_combine_2to1) { + return PixelClock / 2.0; + } else { + return PixelClock; + } +} + +static double TruncToValidBPP( + struct dml2_core_shared_TruncToValidBPP_locals *l, + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + + // Output + unsigned int *RequiredSlots) +{ + double MaxLinkBPP; + unsigned int MinDSCBPP; + double MaxDSCBPP; + unsigned int NonDSCBPP0; + unsigned int NonDSCBPP1; + unsigned int NonDSCBPP2; + enum dml2_odm_mode ODMMode; + + if (Format == dml2_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 16; + } else if (Format == dml2_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 16; + } else { + if (Output == dml2_hdmi || Output == dml2_hdmifrl) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml2_n422 || Output == dml2_hdmifrl) { + MinDSCBPP = 7; + MaxDSCBPP = 16; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 16; + } + } + if (Output == dml2_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml2_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; + + if (ODMMode == dml2_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML2_CALCS_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML2_CALCS_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML2_CALCS_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} + +// updated for dcn4 +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum dml2_output_format_class pixelFormat, + enum dml2_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; + + + if (pixelFormat == dml2_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml2_444) + pixelsPerClock = 1; + else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); + + //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) + slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; + + padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; + + if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { + if ((initial_xmit_delay + padding_pixels) % 3 == 1) { + initial_xmit_delay++; + } + } + + + //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard + if (bpc == 8) + ssm_group_priming_delay = 83; + else if (bpc == 10) + ssm_group_priming_delay = 91; + else if (bpc == 12) + ssm_group_priming_delay = 115; + else if (bpc == 14) + ssm_group_priming_delay = 123; + else + ssm_group_priming_delay = 128; + + //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice + slice_width_groups = (slice_width_modified + 2) / 3; + + //determine number of padded pixels in the last group of a slice line, computed as + slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; + + + + + //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered + number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; + + //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay + //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay + ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; + + //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels + ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; + + //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay + groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; + + + //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice + //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice + lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next + + //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached + //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay + additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; + + //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) + ssm_pipeline_delay = 2; + + //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) + obsm_pipeline_delay = 1; + + //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes + if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + cycles_per_group = 6; + else + cycles_per_group = 3; + //delay of the bit stream contruction layer in pixels is the sum of: + //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice + //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) + //3. additional group of delay if initial transmit delay is reached exactly in a group + //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) + group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; + pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; + + //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) + pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: bpc: %u\n", __func__, bpc); + dml2_printf("DML::%s: BPP: %f\n", __func__, BPP); + dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices); + dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + dml2_printf("DML::%s: Output: %u\n", __func__, Output); + dml2_printf("DML::%s: pixels: %u\n", __func__, pixels); +#endif + return pixels; +} + + +//updated in dcn4 +static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) +{ + unsigned int Delay = 0; + unsigned int dispclk_per_dscclk = 3; + + // sfr + Delay = Delay + 2; + + if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + dispclk_per_dscclk = 3 * 2; + } + + if (pixelFormat == dml2_420) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 5; + + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_s422) { + //dscc top delay for pixel compression layer + Delay = Delay + 17 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } + + // sft + Delay = Delay + 1; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + dml2_printf("DML::%s: Delay = %u\n", __func__, Delay); +#endif + + return Delay; +} + +static unsigned int CalculateHostVMDynamicLevels( + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + unsigned int HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) +{ + unsigned int extra_dpde_bytes; + unsigned int extra_mpde_bytes; + unsigned int MacroTileSizeBytes; + unsigned int vp_height_dpte_ub; + + unsigned int meta_surface_bytes; + unsigned int vm_bytes; + unsigned int vp_height_meta_ub; + unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; + *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; + if (p->SurfaceTiling == dml2_sw_linear) { + *p->meta_row_height = 32; + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->meta_row_height = *p->MetaRequestHeight; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); + } else { + *p->meta_row_height = *p->MetaRequestWidth; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); + } + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } else { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } + + meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); +#endif + if (p->GPUVMEnable == true) { + double meta_vmpg_bytes = 4.0 * 1024.0; + *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); + extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); + } else { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + } + + if (!p->DCCEnable || !p->mrq_present) { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + *p->meta_row_bytes = 0; + } + + if (!p->GPUVMEnable) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRowStorage = 0; + *p->dpte_row_width_ub = 0; + *p->dpte_row_height = 0; + *p->dpte_row_height_linear = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + *p->dpte_row_width_ub_one_row_per_frame = 0; + *p->dpte_row_height_one_row_per_frame = 0; + *p->vmpg_width = 0; + *p->vmpg_height = 0; + *p->PixelPTEReqWidth = 0; + *p->PixelPTEReqHeight = 0; + *p->PTERequestSize = 0; + *p->dpde0_bytes_per_frame_ub = 0; + return 0; + } + + MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); + } else { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); + } + + if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { + *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); + extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); + } else { + *p->dpde0_bytes_per_frame_ub = 0; + extra_dpde_bytes = 0; + } + + vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); +#endif + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->PixelPTEReqHeight = 1; + *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + *p->PTERequestSize = 64; + + *p->vmpg_height = 1; + *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; + } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile + // one 64KB tile, is 16x16x256B req + *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; + *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; + *p->PTERequestSize = 128; + + *p->vmpg_height = *p->PixelPTEReqHeight; + *p->vmpg_width = *p->PixelPTEReqWidth; + } else { + // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + if (p->GPUVMEnable == true) { + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); + DML2_ASSERT(0); + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch); + dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); +#endif + + *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + *p->dpte_row_height_linear = 0; + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); + *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); + if (*p->dpte_row_height_linear > 128) + *p->dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->dpte_row_height = *p->PixelPTEReqHeight; + + if (p->GPUVMMinPageSizeKBytes > 64) { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); + } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); +#endif + + *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; + } else { + *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); + + if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); + } + + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); +#endif + } + + if (p->GPUVMEnable != true) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + } + + *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); + dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); +#endif + + return vm_bytes; +} // CalculateVMAndRowBytes + +static unsigned int CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dml2_rotation_angle RotationAngle, + bool mirrored, + bool ViewportStationary, + unsigned int SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + // Output + unsigned int *VInitPreFill, + unsigned int *MaxNumSwath) +{ + + unsigned int vp_start_rot = 0; + unsigned int sw0_tmp = 0; + unsigned int MaxPartialSwath = 0; + double numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps); + dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (RotationAngle == dml2_rotation_180) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { + vp_start_rot = ViewportXStart; + } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (unsigned int)(numLines); + +} + +static void CalculateRowBandwidth( + bool GPUVMEnable, + bool use_one_row_for_frame, + enum dml2_source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + + bool mrq_present, + unsigned int meta_row_bytes_per_row_ub_l, + unsigned int meta_row_bytes_per_row_ub_c, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + + // Output + double *dpte_row_bw, + double *meta_row_bw) +{ + if (!DCCEnable || !mrq_present) { + *meta_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) + + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateMALLUseForStaticScreen( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + // Output + bool is_using_mall_for_ss[]) +{ + + unsigned int SurfaceToAddToMALL; + bool CanAddAnotherSurfaceToMALL; + unsigned int TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); + if (is_using_mall_for_ss[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && + !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + } + } + if (CanAddAnotherSurfaceToMALL) { + is_using_mall_for_ss[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum dml2_source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dml2_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dml2_rotation_angle RotationAngle, + + // Output + enum dml2_core_internal_request_type *RequestLuma, + enum dml2_core_internal_request_type *RequestChroma, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; + + unsigned int segment_order_horz_contiguous_luma; + unsigned int segment_order_horz_contiguous_chroma; + unsigned int segment_order_vert_contiguous_luma; + unsigned int segment_order_vert_contiguous_chroma; + + unsigned int req128_horz_wc_l; + unsigned int req128_horz_wc_c; + unsigned int req128_vert_wc_l; + unsigned int req128_vert_wc_c; + + unsigned int yuv420; + unsigned int horz_div_l; + unsigned int horz_div_c; + unsigned int vert_div_l; + unsigned int vert_div_c; + + unsigned int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + unsigned int MAS_vp_horz_limit; + unsigned int MAS_vp_vert_limit; + unsigned int max_vp_horz_width; + unsigned int max_vp_vert_height; + unsigned int eff_surf_width_l; + unsigned int eff_surf_width_c; + unsigned int eff_surf_height_l; + unsigned int eff_surf_height_c; + + unsigned int full_swath_bytes_horz_wc_l; + unsigned int full_swath_bytes_horz_wc_c; + unsigned int full_swath_bytes_vert_wc_l; + unsigned int full_swath_bytes_vert_wc_c; + + yuv420 = dml_is_420(SourcePixelFormat); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml2_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); + + MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml2_420_10) { + full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else if (!dml_is_vertical_rotation(RotationAngle)) { + if (req128_horz_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else { + if (req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } + + if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} + +static void calculate_mcache_row_bytes( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) +{ + unsigned int vmpg_bytes = 0; + unsigned int blk_bytes = 0; + float meta_per_mvmpg_per_channel = 0; + unsigned int est_blk_per_vmpg = 2; + unsigned int mvmpg_per_row_ub = 0; + unsigned int full_vp_width_mvmpg_aligned = 0; + unsigned int full_vp_height_mvmpg_aligned = 0; + unsigned int meta_per_mvmpg_per_channel_ub = 0; + unsigned int mvmpg_per_mcache; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans); + dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width); + dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height); + dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); +#endif + DML2_ASSERT(p->mcache_line_size_bytes != 0); + DML2_ASSERT(p->mcache_size_bytes != 0); + + *p->mvmpg_width = 0; + *p->mvmpg_height = 0; + + if (p->full_vp_height == 0 && p->full_vp_width == 0) { + *p->num_mcaches = 0; + *p->mcache_row_bytes = 0; + } else { + blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); + + // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. + // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. + // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. + *p->mvmpg_width = p->blk_width; + *p->mvmpg_height = p->blk_height; + if (p->gpuvm_enable) { + if (vmpg_bytes >= blk_bytes) { + *p->mvmpg_width = p->vmpg_width; + *p->mvmpg_height = p->vmpg_height; + } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { + dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML2_ASSERT(0); + } + } + + //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c + full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); + full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); + + *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; + + //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. + if (!p->surf_vert) { //horizontal access + if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) + *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; + } else { //vertical access + if (p->vp_stationary == 1) + *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; + } + + if (p->gpuvm_enable) { + meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + + //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic + if (p->surf_vert && vmpg_bytes > blk_bytes) { + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + } + + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom + } else { + meta_per_mvmpg_per_channel = (float) blk_bytes / 256 / p->num_chans; + + if (!p->surf_vert) + *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; + else + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); + } + + meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); + + //but for 4KB vmpg with 64KB tile blk + if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) + meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; + + // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, + // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. + if (p->gpuvm_enable || !p->surf_vert) { + *p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; + } else { // horizontal and gpuvm disable + *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; + *p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); + } + + *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref + *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1); + + mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; + *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); +#endif + DML2_ASSERT(*p->num_mcaches > 0); +} + +static void calculate_mcache_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_setting_params *p) +{ + unsigned int n; + + struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; + memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); + + *p->num_mcaches_l = 0; + *p->mcache_row_bytes_l = 0; + *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; + + *p->num_mcaches_c = 0; + *p->mcache_row_bytes_c = 0; + *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; + + *p->mall_comb_mcache_l = 0; + *p->mall_comb_mcache_c = 0; + *p->lc_comb_mcache = 0; + + if (!p->dcc_enable) + return; + + l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; + + l->l_p.num_chans = p->num_chans; + l->l_p.mem_word_bytes = p->mem_word_bytes; + l->l_p.mcache_size_bytes = p->mcache_size_bytes; + l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->l_p.gpuvm_enable = p->gpuvm_enable; + l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->l_p.surf_vert = p->surf_vert; + l->l_p.vp_stationary = p->vp_stationary; + l->l_p.tiling_mode = p->tiling_mode; + l->l_p.vp_start_x = p->vp_start_x_l; + l->l_p.vp_start_y = p->vp_start_y_l; + l->l_p.full_vp_width = p->full_vp_width_l; + l->l_p.full_vp_height = p->full_vp_height_l; + l->l_p.blk_width = p->blk_width_l; + l->l_p.blk_height = p->blk_height_l; + l->l_p.vmpg_width = p->vmpg_width_l; + l->l_p.vmpg_height = p->vmpg_height_l; + l->l_p.full_swath_bytes = p->full_swath_bytes_l; + l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; + + // output + l->l_p.num_mcaches = p->num_mcaches_l; + l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; + l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; + l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; + l->l_p.mvmpg_width = &l->mvmpg_width_l; + l->l_p.mvmpg_height = &l->mvmpg_height_l; + l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; + l->l_p.meta_row_width_ub = &l->meta_row_width_l; + l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; + + calculate_mcache_row_bytes(scratch, &l->l_p); + dml2_assert(*p->num_mcaches_l > 0); + + if (l->is_dual_plane) { + l->c_p.num_chans = p->num_chans; + l->c_p.mem_word_bytes = p->mem_word_bytes; + l->c_p.mcache_size_bytes = p->mcache_size_bytes; + l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->c_p.gpuvm_enable = p->gpuvm_enable; + l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->c_p.surf_vert = p->surf_vert; + l->c_p.vp_stationary = p->vp_stationary; + l->c_p.tiling_mode = p->tiling_mode; + l->c_p.vp_start_x = p->vp_start_x_c; + l->c_p.vp_start_y = p->vp_start_y_c; + l->c_p.full_vp_width = p->full_vp_width_c; + l->c_p.full_vp_height = p->full_vp_height_c; + l->c_p.blk_width = p->blk_width_c; + l->c_p.blk_height = p->blk_height_c; + l->c_p.vmpg_width = p->vmpg_width_c; + l->c_p.vmpg_height = p->vmpg_height_c; + l->c_p.full_swath_bytes = p->full_swath_bytes_c; + l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; + + // output + l->c_p.num_mcaches = p->num_mcaches_c; + l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; + l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; + l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; + l->c_p.mvmpg_width = &l->mvmpg_width_c; + l->c_p.mvmpg_height = &l->mvmpg_height_c; + l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; + l->c_p.meta_row_width_ub = &l->meta_row_width_c; + l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; + + calculate_mcache_row_bytes(scratch, &l->c_p); + dml2_assert(*p->num_mcaches_c > 0); + } + + // Sharing for iMALL access + l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes; + l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes; + l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; + l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; + + if (p->imall_enable) { + *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); + + if (l->is_dual_plane) + *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); + } + + if (!p->surf_vert) // horizonatal access + l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; + else // vertical access + l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; + + // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + if (l->is_dual_plane) { + l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; + + if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { + l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + + (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); + } + *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + + if (l->is_dual_plane) { + dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + } +#endif + // calculate split_coordinate + l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; + l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; + + for (n = 0; n < *p->num_mcaches_l - 1; n++) { + p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; + } + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c - 1; n++) { + p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; + } + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + } +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c; n++) + dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + } +#endif + + // Luma/Chroma combine in the last mcache + // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary + if (*p->lc_comb_mcache && l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_l - 1; n++) + p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + for (n = 0; n < *p->num_mcaches_c - 1; n++) + p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + for (n = 0; n < *p->num_mcaches_c; n++) + dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); +#endif + } + + *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; + *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; +} + +static void calculate_mall_bw_overhead_factor( + double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref + double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes) +{ + for (unsigned int k = 0; k < num_active_planes; ++k) { + mall_prefetch_sdp_overhead_factor[k] = 1.0; + mall_prefetch_dram_overhead_factor[k] = 1.0; + + // SDP - on the return side + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return + mall_prefetch_sdp_overhead_factor[k] = 1.25; + else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + mall_prefetch_sdp_overhead_factor[k] = 0.25; + + // DRAM + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mall_prefetch_dram_overhead_factor[k] = 2.0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); +#endif + } +} + +static double dml_get_return_bandwidth_available( + const struct dml2_soc_bb *soc, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool is_avg_bw, + bool is_hvm_en, + bool is_hvm_only, + double dcflk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + double return_bw_mbps = 0.; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz; + double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; + double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; + + double derate_sdp_factor = 1; + double derate_fabric_factor = 1; + double derate_dram_factor = 1; + + double derate_sdp_bandwidth; + double derate_fabric_bandwidth; + double derate_dram_bandwidth; + + if (is_avg_bw) { + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; + } + } else { // urgent bw + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + } + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; + } + } + } + + derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; + derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; + derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; + + if (bw_type == dml2_core_internal_bw_sdp) + return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); + else // dml2_core_internal_bw_dram + return_bw_mbps = derate_dram_bandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz); + dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); +#endif + return return_bw_mbps; +} + +static void calculate_bandwidth_available( + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], + + const struct dml2_soc_bb *soc, + bool HostVMEnable, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + unsigned int n, m; + + dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + + // Calculate all the bandwidth availabe + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, + m, // soc_state + n, // bw_type + 1, // avg_bw + HostVMEnable, + 0, // hvm_only + dcfclk_mhz, + fclk_mhz, + dram_bw_mbps); + + urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); +#endif + + // urg_bandwidth_available_vm_only is indexed by soc_state + if (n == dml2_core_internal_bw_dram) { + urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + } + } + + avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); +#endif + } +} + +static void calculate_avg_bandwidth_required( + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double mall_prefetch_dram_overhead_factor[], + double mall_prefetch_sdp_overhead_factor[]) +{ + unsigned int n, m, k; + double sdp_overhead_factor; + double dram_overhead_factor_p0; + double dram_overhead_factor_p1; + + // Average BW support check + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram + avg_bandwidth_required[m][n] = 0; + } + } + + // SysActive and SVP Prefetch AVG bandwidth Check + for (k = 0; k < num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: plane %0d\n", __func__, k); + dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); +#endif + + sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; + dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; + dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; + + // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? + // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + } + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +#endif + } +} + +static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) +{ + struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_page_table_levels); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->hostvm_enable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else if (p->display_cfg->gpuvm_enable == true) { + p->vm_group_bytes[k] = 2048; + if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + p->dpte_group_bytes[k] = 512; + } else { + p->dpte_group_bytes[k] = 2048; + } + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; + + s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightC, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->vm_bytes_c = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; + + s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); +#endif + p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); + p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); +#endif + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; + } + + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); +#ifdef __DML_VBA_DEBUG__ + if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + + dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + } +#endif + } + + CalculateMALLUseForStaticScreen( + p->display_cfg, + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->is_using_mall_for_ss); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable) { + if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { + p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); + p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); + } else { + p->PTE_BUFFER_MODE[k] = 0; + p->BIGK_FRAGMENT_SIZE[k] = 0; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DCCMetaBufferSizeNotExceeded[k] = true; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + + // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->display_cfg->gpuvm_enable, + p->use_one_row_for_frame[k], + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + p->mrq_present, + s->meta_row_bytes_per_row_ub_l[k], + s->meta_row_bytes_per_row_ub_c[k], + p->meta_row_height_luma[k], + p->meta_row_height_chroma[k], + + // Output + &p->dpte_row_bw[k], + &p->meta_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int urgent_ramp_uclk_cycles, + unsigned int df_qos_response_time_fclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_urgent_ramp_latency_margin, + double fabric_max_transport_latency_margin) +{ + double urgent_latency = 0; + if (qos_type == dml2_qos_param_type_dcn4) { + urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); + } else { + urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + } +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + } else { + dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + } + dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); +#endif + return urgent_latency; +} + +static double CalculateTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int trip_to_memory_uclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4) { + trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + } else { + dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); +#endif + + + return trip_to_memory_us; +} + +static double CalculateMetaTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int meta_trip_to_memory_uclk_cycles, + unsigned int meta_trip_to_memory_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double meta_trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4) { + meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + meta_trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + } else { + dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); +#endif + + + return meta_trip_to_memory_us; +} + +static void calculate_cursor_req_attributes( + unsigned int cursor_width, + unsigned int cursor_bpp, + + // output + unsigned int *cursor_lines_per_chunk, + unsigned int *cursor_bytes_per_line, + unsigned int *cursor_bytes_per_chunk, + unsigned int *cursor_bytes) +{ + unsigned int cursor_pitch = 0; + unsigned int cursor_bytes_per_req = 0; + unsigned int cursor_width_bytes = 0; + unsigned int cursor_height = 0; + + //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. + //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B + //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width + if (cursor_bpp == 2) + cursor_pitch = 256; + else + cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1); + + //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. + + cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); + if (cursor_width_bytes <= 64) + cursor_bytes_per_req = 64; + else if (cursor_width_bytes <= 128) + cursor_bytes_per_req = 128; + else + cursor_bytes_per_req = 256; + + //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. + *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); + + //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. + if (cursor_bpp == 2) { + *cursor_lines_per_chunk = 16; + } else if (cursor_bpp == 32) { + if (cursor_width <= 32) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 4; + else + *cursor_lines_per_chunk = 2; + } else if (cursor_bpp == 64) { + if (cursor_width <= 16) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 32) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 4; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 2; + else + *cursor_lines_per_chunk = 1; + } else { + if (cursor_width > 0) { + dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + dml2_assert(0); + } + } + + *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; + + // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. + // Only cursor_width is provided for worst case sizing so assume that the cursor is square + cursor_height = cursor_width; + *cursor_bytes = *cursor_bytes_per_line * cursor_height; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width); + dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch); +#endif + + // register CURSOR_PITCH = math_log2(cursor_pitch) - 6; + // register CURSOR_LINES_PER_CHUNK = math_log2(*cursor_lines_per_chunk); +} + +static void calculate_cursor_urgent_burst_factor( + unsigned int CursorBufferSize, + unsigned int CursorWidth, + unsigned int cursor_bytes_per_chunk, + unsigned int cursor_lines_per_chunk, + double LineTime, + double UrgentLatency, + + double *UrgentBurstFactorCursor, + bool *NotEnoughUrgentLatencyHiding) +{ + unsigned int LinesInCursorBuffer = 0; + double CursorBufferSizeInTime = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; + + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + + } +} + +static void CalculateUrgentBurstFactor( + const struct dml2_plane_parameters *plane_cfg, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + // Output + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorLuma = 0; + *UrgentBurstFactorChroma = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC); + dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + DML2_ASSERT(VRatio > 0); + + LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + +} + +static void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; + double ReadBandwidth = 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} + +static double CalculateWriteBackDelay( + enum dml2_source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static unsigned int CalculateMaxVStartup( + bool ptoi_supported, + unsigned int vblank_nom_default_us, + const struct dml2_timing_cfg *timing, + double write_back_delay_us) +{ + unsigned int vblank_size = 0; + unsigned int max_vstartup_lines = 0; + + double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); + unsigned int vblank_actual = timing->v_total - timing->v_active; + unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); + unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line); + unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input; + + vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); + + if (timing->interlaced && !ptoi_supported) + max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom); + dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us); + dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + return max_vstartup_lines; +} + +static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) +{ + unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 }; + unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 }; + unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 }; + unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 }; + + unsigned int TotalActiveDPP = 0; + bool NoChromaOrLinear = true; + unsigned int SurfaceDoingUnboundedRequest = 0; + unsigned int DETBufferSizeInKByteForSwathCalculation; + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth( + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->surf_linear128_l, + p->surf_linear128_c, + p->DPPPerSurface, + + // Output + p->req_per_swath_ub_l, + p->req_per_swath_ub_c, + SwathWidthSingleDPP, + SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + MaximumSwathHeightY, + MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); + p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); +#endif + if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { + p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); + p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha + || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + NoChromaOrLinear = false; + } + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); + + CalculateDETBufferSize( + &scratch->CalculateDETBufferSize_locals, + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + *p->UnboundedRequestEnabled, + p->nomDETInKByte, + p->MaxTotalDETInKByte, + p->ConfigReturnBufferSizeInKByte, + p->MinCompressedBufferSizeInKByte, + p->ConfigReturnBufferSegmentSizeInkByte, + p->CompressedBufferSegmentSizeInkByte, + p->ReadBandwidthLuma, + p->ReadBandwidthChroma, + p->full_swath_bytes_l, + p->full_swath_bytes_c, + p->DPPPerSurface, + + // Output + p->DETBufferSizeInKByte, // per hubp pipe + p->CompressedBufferSizeInkByte); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + *p->ViewportSizeSupport = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + + if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = MaximumSwathHeightY[k]; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + + } else { + p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + } + + if (p->SwathHeightC[k] == 0) + p->request_size_bytes_chroma[k] = 0; + + if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); +#endif + + *p->hw_debug5 = false; + if (!p->mrq_present) { + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(*p->UnboundedRequestEnabled) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); +#endif + } + } +} + +static void CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + enum dml2_output_encoder_class Output, + enum dml2_odm_mode ODMUse, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int MaxNumDPP, + double PixelClock, + unsigned int NumberOfDSCSlices, + + // Output + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum dml2_odm_mode *ODMMode, + double *RequiredDISPCLKPerSurface) +{ + double SurfaceRequiredDISPCLKWithoutODMCombine; + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + double SurfaceRequiredDISPCLKWithODMCombineFourToOne; + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); + *TotalAvailablePipesSupport = true; + + if (OutFormat == dml2_420) { + if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH) + *TotalAvailablePipesSupport = false; + else if (HActive > 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_4to1; + else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_3to1; + else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_2to1; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_2to1) + *TotalAvailablePipesSupport = false; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_3to1) + *TotalAvailablePipesSupport = false; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_4to1) + *TotalAvailablePipesSupport = false; + } + + if (ODMUse == dml2_odm_mode_bypass || ODMUse == dml2_odm_mode_auto) + *ODMMode = dml2_odm_mode_bypass; + else if (ODMUse == dml2_odm_mode_combine_2to1) + *ODMMode = dml2_odm_mode_combine_2to1; + else if (ODMUse == dml2_odm_mode_combine_3to1) + *ODMMode = dml2_odm_mode_combine_3to1; + else if (ODMUse == dml2_odm_mode_combine_4to1) + *ODMMode = dml2_odm_mode_combine_4to1; + else if (ODMUse == dml2_odm_mode_split_1to2) + *ODMMode = dml2_odm_mode_split_1to2; + else if (ODMUse == dml2_odm_mode_mso_1to2) + *ODMMode = dml2_odm_mode_mso_1to2; + else if (ODMUse == dml2_odm_mode_mso_1to4) + *ODMMode = dml2_odm_mode_mso_1to4; + + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPP = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse); + dml2_printf("DML::%s: Output = %d\n", __func__, Output); + dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); +#endif + + if (ODMUse == dml2_odm_mode_combine_4to1 || (ODMUse == dml2_odm_mode_auto && + (SurfaceRequiredDISPCLKWithODMCombineThreeToOne > MaxDispclk || + (DSCEnable && ((NumberOfDSCSlices % 4 == 0) && ((HActive > 3 * MaximumPixelsPerLinePerDSCUnit) || NumberOfDSCSlices > 8)))))) { + if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *TotalAvailablePipesSupport = false; + } + } else if (ODMUse == dml2_odm_mode_combine_3to1 || (ODMUse == dml2_odm_mode_auto && + ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) || + (DSCEnable && ((NumberOfDSCSlices % 3 == 0) && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) { + if (TotalNumberOfActiveDPP + 3 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_3to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + *NumberOfDPP = 3; + } else { + *TotalAvailablePipesSupport = false; + } + + } else if (ODMUse == dml2_odm_mode_combine_2to1 || (ODMUse == dml2_odm_mode_auto && + ((SurfaceRequiredDISPCLKWithoutODMCombine > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) || + (DSCEnable && ((NumberOfDSCSlices % 2 == 0) && ((HActive > MaximumPixelsPerLinePerDSCUnit) || (NumberOfDSCSlices > 4 && NumberOfDSCSlices <= 8))))))) { + if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } else { + *TotalAvailablePipesSupport = false; + } + + } else { + if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) { + *NumberOfDPP = 1; + } else { + *TotalAvailablePipesSupport = false; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); +#endif + +} + +static void CalculateOutputLink( + struct dml2_core_internal_scratch *s, + double PHYCLK, + double PHYCLKD18, + double PHYCLKD32, + double Downspreading, + bool IsMainSurfaceUsingTheIndicatedTiming, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + enum dml2_dsc_enable_option DSCEnable, + unsigned int OutputLinkDPLanes, + enum dml2_output_link_dp_rate OutputLinkDPRate, + + // Output + bool *RequiresDSC, + bool *RequiresFEC, + double *OutBpp, + enum dml2_core_internal_output_type *OutputType, + enum dml2_core_internal_output_type_rate *OutputRate, + unsigned int *RequiredSlots) +{ + bool LinkDSCEnable; + unsigned int dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml2_core_internal_output_type_unknown; + *OutputRate = dml2_core_internal_output_rate_unknown; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + dml2_printf("DML::%s: IsMainSurfaceUsingTheIndicatedTiming = %u\n", __func__, IsMainSurfaceUsingTheIndicatedTiming); + dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output); + dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); +#endif + if (IsMainSurfaceUsingTheIndicatedTiming) { + if (Output == dml2_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml2_core_internal_output_type_hdmi; + } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp || Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml2_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32 < 13500 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32 < 20000 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; + } + } + } else if (Output == dml2_hdmifrl) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + *RequiresFEC = false; + } + *OutBpp = 0; + if (PHYCLKD18 >= 3000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "3x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 8000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "8x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 10000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000 / 18) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "10x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 12000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "12x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; + } + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp); +#endif +} + +static double CalculateWriteBackDISPCLK( + enum dml2_source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; + return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); +} + +static double RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum dml2_output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout) +{ + if (DSCEnable != true) { + return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); + double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static unsigned int DSCDelayRequirement( + bool DSCEnabled, + enum dml2_odm_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum dml2_output_format_class OutputFormat, + enum dml2_output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd) +{ + unsigned int DSCDelayRequirement_val = 0; + unsigned int NumberOfDSCSlicesFactor = 1; + + if (DSCEnabled == true && OutputBpp != 0) { + + if (ODMMode == dml2_odm_mode_combine_4to1) + NumberOfDSCSlicesFactor = 4; + else if (ODMMode == dml2_odm_mode_combine_3to1) + NumberOfDSCSlicesFactor = 3; + else if (ODMMode == dml2_odm_mode_combine_2to1) + NumberOfDSCSlicesFactor = 2; + + DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), + (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); + + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode); + dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static void CalculateSurfaceSizeInMall( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int BytesPerPixelY[], + unsigned int BytesPerPixelC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + // Output + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize) +{ + unsigned int TotalSurfaceSizeInMALLForSS = 0; + unsigned int TotalSurfaceSizeInMALLForSubVP = 0; + unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; + const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; + + if (composition->viewport.stationary) { + SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - + math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * + math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), + math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - + math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - + math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * + math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + if (surface->dcc.enable) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2(surface->plane0.width, 8 * Read256BytesBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - + math_floor2(composition->viewport.plane0.x_start, 8 * Read256BytesBlockWidthY[k])) * + math_min2(math_ceil2(surface->plane0.height, 8 * Read256BytesBlockHeightY[k]), + math_floor2(composition->viewport.plane0.y_start + composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - + math_floor2(composition->viewport.plane0.y_start, 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2(surface->plane1.width, 8 * Read256BytesBlockWidthC[k]), + math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - + math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockWidthC[k])) * + math_min2(math_ceil2(surface->plane1.height, 8 * Read256BytesBlockHeightC[k]), + math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256); + } + } + } else { + SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + if (surface->dcc.enable) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); + + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256); + } + } + } + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + /* SS and Subvp counted separate as they are never used at the same time */ + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; + else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) + TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; + } + + *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || + (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); +#endif +} + +static void calculate_tdlut_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_tdlut_setting_params *p) +{ + // locals + unsigned int tdlut_bpe = 8; + unsigned int tdlut_width; + unsigned int tdlut_pitch_bytes; + unsigned int tdlut_footprint_bytes; + unsigned int vmpg_bytes; + unsigned int tdlut_vmpg_per_frame; + unsigned int tdlut_pte_req_per_frame; + unsigned int tdlut_bytes_per_line; + unsigned int tdlut_delivery_cycles; + double tdlut_drain_rate; + unsigned int tdlut_mpc_width; + unsigned int tdlut_bytes_per_group_simple; + + if (!p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = 0; + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = 0; + *p->tdlut_bytes_per_group = 0; + *p->tdlut_pte_bytes_per_frame = 0; + *p->tdlut_bytes_per_frame = 0; + return; + } + + + if (!p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = 0; + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = 0; + *p->tdlut_bytes_per_group = 0; + return; + } + + if (p->tdlut_mpc_width_flag) { + tdlut_mpc_width = 33; + tdlut_bytes_per_group_simple = 39*256; + } else { + tdlut_mpc_width = 17; + tdlut_bytes_per_group_simple = 10*256; + } + + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 4916; + else + tdlut_width = 35940; + } else { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 17; + else // dml2_tdlut_width_33_cube + tdlut_width = 33; + } + + if (p->is_gfx11) + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment + else + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) + tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; + else + tdlut_footprint_bytes = tdlut_pitch_bytes; + + if (!p->gpuvm_enable) { + tdlut_vmpg_per_frame = 0; + tdlut_pte_req_per_frame = 0; + } else { + tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; + tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; + } + tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request + *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { + //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice + *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; + *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; + //the delivery cycles is DispClk cycles per line * number of lines * number of slices + tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + } else { + //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements + *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); + *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; + tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); + tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; + } + + //the tdlut is fetched during the 2 row times of prefetch. + if (p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + + dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); + dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); +#endif +} + +static void CalculateTarb( + const struct dml2_display_cfg *display_cfg, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + double ReturnBW, + unsigned int MetaChunkSize, + + // output + double *Tarb, + double *Tarb_prefetch) +{ + double extra_bytes = 0; + double extra_bytes_prefetch = 0; + double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_page_table_levels); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); + + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + extra_bytes = extra_bytes + (MetaChunkSize * 1024); + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; + } + + extra_bytes_prefetch = extra_bytes; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (display_cfg->gpuvm_enable == true) { + extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; + } + } + *Tarb = extra_bytes / ReturnBW; + *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); +#endif +} + +static double CalculateTWait( + long reserved_vblank_time_ns, + double UrgentLatency, + double Ttrip) +{ + double TWait; + double t_urg_trip = math_max2(UrgentLatency, Ttrip); + TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip); + dml2_printf("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} + + +static void CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + // Output + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + unsigned int *VUpdateWidthPix, + unsigned int *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk); + dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static double get_urgent_bandwidth_required( + struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, + const struct dml2_display_cfg *display_cfg, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool inc_flip_bw, // including flip bw + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]) +{ + memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; + l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + + l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; + l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; + l->adj_factor_cur = UrgentBurstFactorCursor[k]; + l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; + l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; + l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); + bool exclude_this_plane = 0; + + // Exclude phantom pipe in bw calculation for non svp prefetch state + if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) + exclude_this_plane = 1; + + if (display_cfg->plane_descriptors[k].immediate_flip == false || !inc_flip_bw) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + else + l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; + + + if (!exclude_this_plane) { + l->required_bandwidth_mbps = l->required_bandwidth_mbps + + math_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, + l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + + dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + + dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + + dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); + dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); +#endif + } + + return l->required_bandwidth_mbps; +} + +static void CalculateExtraLatency( + const struct dml2_display_cfg *display_cfg, + unsigned int ROBBufferSizeInKByte, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + double FabricClock, + unsigned int PixelChunkSizeInKByte, + double ReturnBW, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + enum dml2_qos_param_type qos_type, + bool max_oustanding_when_urgent_expected, + unsigned int max_outstanding_requests, + unsigned int request_size_bytes_luma[], + unsigned int request_size_bytes_chroma[], + unsigned int MetaChunkSize, + unsigned int dchub_arb_to_ret_delay, + double Ttrip, + unsigned int hostvm_mode, + + // output + double *ExtraLatency, // Tex + double *ExtraLatency_sr, // Tex_sr + double *ExtraLatencyPrefetch) + +{ + double Tarb; + double Tarb_prefetch; + double Tex_trips; + unsigned int max_request_size_bytes = 0; + + CalculateTarb( + display_cfg, + PixelChunkSizeInKByte, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + tdlut_bytes_per_group, + HostVMInefficiencyFactor, + HostVMInefficiencyFactorPrefetch, + HostVMMinPageSize, + ReturnBW, + MetaChunkSize, + // output + &Tarb, + &Tarb_prefetch); + + Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (request_size_bytes_luma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_luma[k]; + if (request_size_bytes_chroma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_chroma[k]; + } + + if (qos_type == dml2_qos_param_type_dcn4) { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; + *ExtraLatency = *ExtraLatency_sr; + if (max_oustanding_when_urgent_expected) + *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; + } else { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; + *ExtraLatency = *ExtraLatency_sr; + } + *ExtraLatency = *ExtraLatency + Tex_trips; + *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; + *ExtraLatency = *ExtraLatency + Tarb; + *ExtraLatency_sr = *ExtraLatency_sr + Tarb; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); + dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); + dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); + dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); +#endif +} + +static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) +{ + struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; + bool dcc_mrq_enable; + + unsigned int vm_bytes; + unsigned int extra_tdpe_bytes; + unsigned int tdlut_row_bytes; + unsigned int Lo; + + s->NoTimeToPrefetch = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingVM = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + *p->Tvm_trips = 0.0; + *p->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + s->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + s->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est3 = 0.0; + s->cursor_prefetch_bytes = 0; + *p->prefetch_cursor_bw = 0; + + dcc_mrq_enable = (p->dcc_enable && p->mrq_present); + + s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip) + + if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { + s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_page_table_levels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); + dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); + dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); + dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->Ttrip; +#ifdef DML_TVM_UPDATE_EN + *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); + if (dcc_mrq_enable) + *p->Tvm_trips_flip = *p->Tvm_trips; + else + *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; +#else + *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); + *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; +#endif + + *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); + +#ifdef DML_TVM_UPDATE_EN + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; + *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; + } else { + *p->Tdmdl_vm = 0; + *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex + } +#else + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; + *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; + } else { + *p->Tdmdl_vm = 0; + *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex + } +#endif + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); + *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + + dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); +#endif + + if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + + s->NoTimeToPrefetch = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); +#endif + if (p->display_cfg->gpuvm_enable) { + s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { +#ifdef DML_TVM_UPDATE_EN + if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) + s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); + else + s->Tvm_trips_rounded = s->LineTime / 4.0; +#else + s->Tvm_trips_rounded = s->LineTime / 4.0; +#endif + *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; + } + + s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); + *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { + s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + s->Tr0_trips_rounded = s->LineTime / 4.0; + *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; + } + s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); + *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true) { + if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { + *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); + } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { + *p->Tno_bw = p->ExtraLatencyPrefetch; + } else { + *p->Tno_bw = 0; + } + } else { + *p->Tno_bw = 0; + } + +#ifdef DML_TVM_UPDATE_EN + if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) + *p->Tno_bw_flip = *p->Tno_bw; + else + *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip +#else + *p->Tno_bw_flip = 0; + if (p->display_cfg->gpuvm_enable == true) + *p->Tno_bw_flip = *p->Tno_bw; +#endif + + if (dml_is_420(p->myPipe->SourcePixelFormat)) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface; + if (p->myPipe->VRatio < 1.0) + s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; + s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); + + s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; +#ifdef DML_TDLUT_ROW_BYTES_FIX_EN + s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; + s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; +#endif + s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; + extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); + + if (p->setup_for_tdlut) + vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); + +#ifdef DML_TDLUT_ROW_BYTES_FIX_EN + tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); +#else + tdlut_row_bytes = p->tdlut_pte_bytes_per_frame; +#endif +#ifdef DML_REG_LIMIT_CLAMP_EN + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); +#endif + s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_oto = math_max3( + *p->Tvm_trips, + *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); +#endif + } else { +#ifdef DML_TVM_UPDATE_EN + s->Tvm_oto = s->Tvm_trips_rounded; +#else + s->Tvm_oto = s->LineTime / 4.0; +#endif + } + + if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { + s->Tr0_oto = math_max3( + *p->Tr0_trips, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 4.0; + + s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + + //To (time for delay after scaler) in line time + Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + + //Tpre_equ in line time +#ifdef DML_TVM_UPDATE_EN + if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; + else + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; +#else + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; +#endif + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); +#endif + double Tpre = s->dst_y_prefetch_equ * s->LineTime; + s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + + dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); + dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); + dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); + dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + + s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); + dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); + + if (s->prefetch_sw_bytes < s->dep_bytes) { + s->prefetch_sw_bytes = 2 * s->dep_bytes; + dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); + } + + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + + // Derive bandwidth by finding how much data to move within the time constraint + // Tpre_rounded is Tpre rounding to 2-bit fraction + // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time + // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time + // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less + if (s->dst_y_prefetch_equ > 1) { + s->prefetch_bw1 = 0.; + s->prefetch_bw2 = 0.; + s->prefetch_bw3 = 0.; + s->prefetch_bw4 = 0.; + + // prefetch_bw1: VM + 2*R0 + SW + if (s->Tpre_rounded - *p->Tno_bw > 0) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + + s->prefetch_sw_bytes) + / (s->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1; + } else + s->prefetch_bw1 = 0; + + dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / + (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); + dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); +#endif + } + + // prefetch_bw2: VM + SW + if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw2 = 0; + + // prefetch_bw3: 2*R0 + SW + if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / + (s->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3; + } else + s->prefetch_bw3 = 0; + + dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + } + + // prefetch_bw4: SW + if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw4 = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); + dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); +#endif + { + bool Case1OK = false; + bool Case2OK = false; + bool Case3OK = false; + + // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement + // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive + // vs the latency based number + + // prefetch_bw1: VM + 2*R0 + SW + // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data) + // here is to make sure equ bw wont be more agressive than the latency-based requirement. + // check vm time >= vm_trips + // check r0 time >= r0_trips + if (s->prefetch_bw1 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + Case1OK = true; + } + } + + // prefetch_bw2: VM + SW + // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw) + // check vm time >= vm_trips + // check r0 time < r0_trips + if (s->prefetch_bw2 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + Case2OK = true; + } + } + + // prefetch_bw3: VM + 2*R0 + // check vm time < vm_trips + // check r0 time >= r0_trips + if (s->prefetch_bw3 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + Case3OK = true; + } + } + + if (Case1OK) { + s->prefetch_bw_equ = s->prefetch_bw1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->prefetch_bw2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->prefetch_bw3; + } else { + s->prefetch_bw_equ = s->prefetch_bw4; + } + +#ifdef DML_REG_LIMIT_CLAMP_EN + s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); +#endif +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); + dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); + dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK); + dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + + if (s->prefetch_bw_equ > 0) { + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { + s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes + *p->Tr0_trips, + s->LineTime / 4); + } else { + s->Tr0_equ = s->LineTime / 4; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); +#endif + // Use the more stressful prefetch schedule + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->dst_y_prefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingVM = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); +#endif + + } else { + *p->dst_y_prefetch = s->dst_y_prefetch_equ; + s->TimeForFetchingVM = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + if (p->VStartup == p->MaxVStartup) { + *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); +#endif + } + + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + + s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); + *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + + dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); +#endif + dml2_assert(*p->dst_y_prefetch < 64); + + unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); + if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, + (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; + *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); +#endif + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + dml2_printf("DML: To: %fus - time for propogation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + s->NoTimeToPrefetch = true; + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (vm_bytes == 0) { + prefetch_vm_bw = 0; + } else if (*p->dst_y_per_vm_vblank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + } + + if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { + prefetch_row_bw = 0; + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + } + + *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->NoTimeToPrefetch) { + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->dst_y_prefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; +} + +static void calculate_peak_bandwidth_required( + struct dml2_core_internal_scratch *s, + + // output + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + bool inc_flip_bw, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]) +{ + unsigned int n; + unsigned int m; + + struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, inc_flip_bw); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->unity_array[k] = 1.0; + l->zero_array[k] = 0.0; + } + + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + 0, //inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + UrgentBurstFactorLuma, + UrgentBurstFactorChroma, + UrgentBurstFactorCursor, + UrgentBurstFactorLumaPre, + UrgentBurstFactorChromaPre, + UrgentBurstFactorCursorPre); + + + urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + PrefetchBandwidthLuma, + PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + prefetch_cursor_bw, + prefetch_vmrow_bw, + flip_bw, + UrgentBurstFactorLuma, + UrgentBurstFactorChroma, + UrgentBurstFactorCursor, + UrgentBurstFactorLumaPre, + UrgentBurstFactorChromaPre, + UrgentBurstFactorCursorPre); + + non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + PrefetchBandwidthLuma, + PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + prefetch_cursor_bw, + prefetch_vmrow_bw, + flip_bw, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_vactive_bandwidth_required[m][n]); + dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_required[m][n]); + dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), non_urg_bandwidth_required[m][n]); +#endif + dml2_assert(urg_bandwidth_required[m][n] >= non_urg_bandwidth_required[m][n]); + } + } +} + +static void check_urgent_bandwidth_support( + double *frac_urg_bandwidth_nom, + double *frac_urg_bandwidth_mall, + bool *vactive_bandwidth_support_ok, // vactive ok + bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok + + unsigned int mall_allocated_for_dcn_mbytes, + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + double frac_urg_bandwidth_mall_sdp; + double frac_urg_bandwidth_mall_dram; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0) + frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + else + frac_urg_bandwidth_mall_sdp = 0.0; + if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0) + frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + else + frac_urg_bandwidth_mall_dram = 0.0; + + *bandwidth_support_ok = 1; + *vactive_bandwidth_support_ok = 1; + + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch + + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + + if (mall_allocated_for_dcn_mbytes > 0) { + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + + *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); + *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); + + *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); + + if (mall_allocated_for_dcn_mbytes > 0) + *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); + + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + if (mall_allocated_for_dcn_mbytes > 0) { + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + + dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); + } + } +#endif + +} + +static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double flip_bw_available_mbps; + double flip_bw_available_sdp_mbps; + double flip_bw_available_dram_mbps; + + flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; + flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; + flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); +#endif + + return flip_bw_available_mbps; +} + +static void calculate_immediate_flip_bandwidth_support( + // Output + double *frac_urg_bandwidth_flip, + bool *flip_bandwidth_support_ok, + + // Input + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; + double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; + + *flip_bandwidth_support_ok = true; + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); + dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); +#endif + dml2_assert(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); + } + + *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; + *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); + } + } +#endif +} + +static void CalculateFlipSchedule( + struct dml2_core_internal_scratch *s, + bool iflip_enable, + bool use_lb_flip_bw, + double HostVMInefficiencyFactor, + double Tvm_trips_flip, + double Tr0_trips_flip, + double Tvm_trips_flip_rounded, + double Tr0_trips_flip_rounded, + bool GPUVMEnable, + double vm_bytes, // vm_bytes + double DPTEBytesPerRow, // dpte_row_bytes + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum dml2_source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw_flip, + unsigned int dpte_row_height, + unsigned int dpte_row_height_chroma, + bool use_one_row_for_frame_flip, + unsigned int max_flip_time_us, + unsigned int per_pipe_flip_bytes, + unsigned int meta_row_bytes, + unsigned int meta_row_height, + unsigned int meta_row_height_chroma, + bool dcc_mrq_enable, + + // Output + double *dst_y_per_vm_flip, + double *dst_y_per_row_flip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; + + l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dpte_row_bytes = DPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); + dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); +#endif + + if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { + if (l->dual_plane) { + if (dcc_mrq_enable & GPUVMEnable) { + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); + } else if (GPUVMEnable) { + l->min_row_height = dpte_row_height; + l->min_row_height_chroma = dpte_row_height_chroma; + } else { + l->min_row_height = meta_row_height; + l->min_row_height_chroma = meta_row_height_chroma; + } + l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); + } else { + if (dcc_mrq_enable & GPUVMEnable) + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + else if (GPUVMEnable) + l->min_row_height = dpte_row_height; + else + l->min_row_height = meta_row_height; + + l->min_row_time = l->min_row_height * LineTime / VRatio; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); +#endif + dml2_assert(l->min_row_time > 0); + + if (use_lb_flip_bw) { + // For mode check, calculation the flip bw requirement with worst case flip time + l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + + //The lower bound on flip bandwidth + // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required + l->lb_flip_bw = 0; + + if (iflip_enable) { + l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; + l->num_rows = 2; + l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); + l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; + l->lb_flip_bw = math_max3( + l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), + l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), + l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + + if (l->lb_flip_bw > 0) { + dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + } +#endif + l->lb_flip_bw = math_max3(l->lb_flip_bw, + l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); +#endif + } + + *final_flip_bw = l->lb_flip_bw; + + *dst_y_per_vm_flip = 1; // not used + *dst_y_per_row_flip = 1; // not used + *ImmediateFlipSupportedForPipe = true; + } else { + if (iflip_enable) { + l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) + double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); + dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion); +#endif + if (l->ImmediateFlipBW == 0) { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + } else { + l->Tvm_flip = math_max3(Tvm_trips_flip, + Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, + LineTime / 4.0); + + l->Tr0_flip = math_max3(Tr0_trips_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, + LineTime / 4.0); + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + + dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); +#endif + *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; + *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; + + *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); + + if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + +#ifdef __DML_VBA_DEBUG__ + if (!use_lb_flip_bw) { + dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + } + dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) +{ + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy; + double reserved_vblank_time_us; + bool FoundCriticalSurface = false; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); +#endif + + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); + dml2_printf("DML::%s: g6_temp_read_watermark_us = %f\n", __func__, p->Watermark->g6_temp_read_watermark_us); +#endif + + s->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequired) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequired) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequired) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); + } + + *p->global_fclk_change_supported = true; + *p->global_dram_clock_change_supported = true; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; + double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; + double LBBitPerPixel = 57; + + s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); + s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines); + dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); + dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; + } + s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->g6_temp_read_watermark_us; + + if (p->VActiveLatencyHidingMargin) + p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + + if (p->VActiveLatencyHidingUs) + p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; + + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + + uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; + reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; + + p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported; + if (s->ActiveFCLKChangeLatencyMargin[k] > 0) + p->FCLKChangeSupport[k] = dml2_fclock_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) + p->FCLKChangeSupport[k] = dml2_fclock_change_vblank; + + if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported) + *p->global_fclk_change_supported = false; + + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; + if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { + if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; + else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame; + + if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported) + *p->global_dram_clock_change_supported = false; + + s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); + s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); + s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); + dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); + s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; + + if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); + else + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + + *p->g6_temp_read_support = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && + (s->g6_temp_read_latency_margin[k] < 0)) { + *p->g6_temp_read_support = false; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} + +static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + double bw_mbps = 0; + bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return bw_mbps; +} + +static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_mhz = 0; + + uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return uclk_mhz; +} + +static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +static unsigned int get_pipe_flip_bytes( + double hostvm_inefficiency_factor, + unsigned int vm_bytes, + unsigned int dpte_row_bytes, + unsigned int meta_row_bytes) +{ + unsigned int flip_bytes = 0; + + flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes); + flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor); + + return flip_bytes; +} + +static void calculate_hostvm_inefficiency_factor( + double *HostVMInefficiencyFactor, + double *HostVMInefficiencyFactorPrefetch, + + bool gpuvm_enable, + bool hostvm_enable, + unsigned int remote_iommu_outstanding_translations, + unsigned int max_outstanding_reqs, + double urg_bandwidth_avail_active_pixel_and_vm, + double urg_bandwidth_avail_active_vm_only) +{ + *HostVMInefficiencyFactor = 1; + *HostVMInefficiencyFactorPrefetch = 1; + + if (gpuvm_enable && hostvm_enable) { + *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; + *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; + + if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) + *HostVMInefficiencyFactorPrefetch = 4; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); +#endif + } +} + +static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + +#if defined(__DML_VBA_DEBUG__) + double old_ReadBandwidthLuma; + double old_ReadBandwidthChroma; +#endif + double outstanding_latency_us = 0; + double min_return_bw_for_latency; + + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + unsigned int k, m, n; + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); + + mode_lib->ms.num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->ms.state_idx = in_out_params->min_clk_index; + mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); + mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); + mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); + mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; + mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; + mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); + mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: --- START --- \n", __func__); + dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + + // dml2_printf_dml_policy(&mode_lib->ms.policy); + // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + // Many core tests are still setting scaling parameters "incorrectly" + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false + && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 + || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) + && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k], + &mode_lib->ms.surf_linear128_l[k], + &mode_lib->ms.surf_linear128_c[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * + display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + +#ifdef __DML_VBA_DEBUG__ + old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; + dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); + dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); + dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]); +#endif + } + + // Writeback bandwidth + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + /* Writeback Mode Support Check */ + s->TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= (unsigned int)mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true + && (display_cfg->plane_descriptors[k].stream_index == k)) { + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + } + } + + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + + /* Writeback Scale Ratio and Taps Support Check */ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + // Max Viewport Size support + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + s->MaximumSwathWidthSupportLuma = 15360; + } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video + s->MaximumSwathWidthSupportLuma = 7680 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video + s->MaximumSwathWidthSupportLuma = 4320 + 16; + } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha + s->MaximumSwathWidthSupportLuma = 5120 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp + s->MaximumSwathWidthSupportLuma = 3072 + 16; + } else { + s->MaximumSwathWidthSupportLuma = 6144 + 16; + } + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); + } + mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /* Cursor Support Check */ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + mode_lib->ms.support.CursorSupport = false; + } + } + } + + /* Valid Pitch Check */ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + // data pitch + unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; + + if (mode_lib->ms.surf_linear128_l[k]) + alignment_l = alignment_l / 2; + + mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; + + if (mode_lib->ms.surf_linear128_c[k]) + alignment_c = alignment_c / 2; + mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); + } else { + mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; + } + + if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || + mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { + mode_lib->ms.support.PitchSupport = false; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); +#endif + } + + // meta pitch + if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) + mode_lib->ms.support.PitchSupport = false; + + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, + display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) + mode_lib->ms.support.PitchSupport = false; + } + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); +#endif + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + + // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + { + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.support.TotalAvailablePipesSupport = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ip.max_num_dpp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ip.max_num_dpp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + mode_lib->ms.support.NumberOfDSCSlices[k], + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + /*Number Of DSC Slices*/ + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->plane_descriptors[k].stream_index == k) { + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices) + mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices; + else { + if (s->PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); + } else if (s->PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (s->PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (s->PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + + if (s->ODMModeDSC == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); + else if (s->ODMModeDSC == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 12; + else if (s->ODMModeDSC == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); + + CalculateOutputLink( + &mode_lib->scratch, + ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), + mode_lib->soc.phy_downspread_percent, + (display_cfg->plane_descriptors[k].stream_index == k), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + s->PixelClockBackEnd[k], + s->OutputBpp[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, + s->ODMModeNoDSC, + s->ODMModeDSC, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBpp[k], + &mode_lib->ms.OutputType[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng + &mode_lib->ms.OutputRate[k], + &mode_lib->ms.RequiredSlots[k]); + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMMode[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; + } + dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + } + + // FIXME_DCN4 - add odm vs mpc use check + + // FIXME_DCN4 - add imall cap check + mode_lib->ms.support.incorrect_imall_usage = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mode_lib->ms.support.incorrect_imall_usage = 1; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 4; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 3; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP++; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + } + } else { + if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP++; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); +#endif + } + + if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; + for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ip.writeback_line_buffer_buffer_size)); + } + } + + mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); + } + + mode_lib->ms.GlobalDPPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); + } + + /* Total Available OTG, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) + s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup + //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + //} + } + } + } + + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); + mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); + + mode_lib->ms.support.ExceededMultistreamSlots = false; + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && + display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || + ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1) { + // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) + // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // } + //} + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //} + } + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + s->PixelClockBackEnd[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + mode_lib->ms.OutputBpp[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); + + if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { + s->DSCFormatFactor = 2; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { + s->DSCFormatFactor = 1; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; + if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + } + } + } + } + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true) { + s->NumDSCUnitRequired = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->NumDSCUnitRequired = 4; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->NumDSCUnitRequired = 3; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->NumDSCUnitRequired = 2; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + } + + mode_lib->ms.support.NotEnoughDSCUnits = false; + if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Figure out the swath and DET configuration after the num dpp per plane is figured out + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.SurfaceSizeInMALL[k] = 0; + mode_lib->ms.support.ExceededMALLSize = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; + + s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; + CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; + CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; + CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; + CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + mode_lib->ms.support.PTEBufferSizeNotExceeded = true; + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded = false; + + if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); +#endif + + mode_lib->ms.UrgLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = CalculateTripToMemory( + mode_lib->ms.UrgLatency, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool cursor_not_enough_urgent_latency_hiding = 0; + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); +#endif + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + + // Output + &mode_lib->ms.UrgentBurstFactorLuma[k], + &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + CalculateDCFCLKDeepSleep( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (display_cfg->plane_descriptors[m].stream_index == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.enable == true) { + mode_lib->ms.WritebackDelayTime[k] = math_max2(mode_lib->ms.WritebackDelayTime[k], + mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK); + } + } + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (display_cfg->plane_descriptors[k].stream_index == m) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m]; + } + } + } + + // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + s->MaximumVStartup[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->ms.WritebackDelayTime[k]); + mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); +#endif + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequired = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; + } + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || + ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && + (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + s->FullFrameMALLPStateMethod = true; + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { + s->SubViewportMALLPStateMethod = true; + if (!display_cfg->overrides.enable_subvp_implicit_pmo) { + // For dv, small frame tests will have very high refresh rate + unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + if (refresh_rate > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + } + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || + (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); +#endif + + //Re-ordering Buffer Support Check + + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + + mode_lib->ms.support.max_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)) >= mode_lib->ms.support.max_urgent_latency_us) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } else { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); +#endif + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)) >= mode_lib->ms.support.max_non_urgent_latency_us) { + mode_lib->ms.support.ROBUrgencyAvoidance = true; + } else { + mode_lib->ms.support.ROBUrgencyAvoidance = false; + } + } else { + mode_lib->ms.support.ROBUrgencyAvoidance = true; + } + + mode_lib->ms.support.OutstandingRequestsSupport = true; + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; + + mode_lib->ms.support.avg_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.avg_non_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); +#endif + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); +#endif + } + } + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; + + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + mode_lib->ms.num_active_planes); + } + + // Calculate all the bandwidth available + // Need anothe bw for latency evaluation + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, // not used + mode_lib->ms.support.avg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_min_latency, + mode_lib->ms.support.urg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.dram_bw_mbps); + + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, + mode_lib->ms.support.avg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_min, + mode_lib->ms.support.urg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_vm_only, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.MaxDCFCLK, + mode_lib->ms.MaxFabricClock, + mode_lib->ms.max_dram_bw_mbps); + + + // Average BW support check + calculate_avg_bandwidth_required( + mode_lib->ms.support.avg_bandwidth_required, + // input + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + mode_lib->ms.mall_prefetch_sdp_overhead_factor); + + for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); + } + + mode_lib->ms.support.AvgBandwidthSupport = true; + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; + dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + + } + } + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { + mode_lib->ms.support.AvgBandwidthSupport = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); +#endif + } + } + } + + /* Prefetch Check */ + { + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + { + mode_lib->ms.support.PrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory); + + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + s->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { + s->AnyLinesForVMOrRowTooLarge = true; + } + } + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.non_urg_bandwidth_required, + + display_cfg, + 0, // inc_flip_bw + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dpte_row_bw, + mode_lib->ms.meta_row_bw, + mode_lib->ms.prefetch_cursor_bw, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.final_flip_bw, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + + + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + s->dummy_bw, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + + // Input + display_cfg, + 1, // inc_flip_bw + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dpte_row_bw, + mode_lib->ms.meta_row_bw, + mode_lib->ms.prefetch_cursor_bw, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.final_flip_bw, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + } // prefetch schedule + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.use_one_row_for_frame[k] = mode_lib->ms.use_one_row_for_frame[k]; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; // FIXME_STAGE2: no USR related bbox value + s->mSOCParameters.SMNLatency = 0; // FIXME_STAGE2 + s->mSOCParameters.g6_temp_read_blackout_us = mode_lib->soc.power_management_parameters.g6_temp_read_blackout_us[in_out_params->min_clk_index]; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false /*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + } + + // End of Prefetch Check + + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + + /*Mode Support, Voltage State and SOC Configuration*/ + { + // s->dram_clock_change_support = 1; + // s->f_clock_change_support = 1; + + if (mode_lib->ms.support.ScaleRatioAndTapsSupport + && mode_lib->ms.support.SourceFormatPixelAndScanSupport + && mode_lib->ms.support.ViewportSizeSupport + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP + && !mode_lib->ms.support.DSC422NativeNotSupported + && !mode_lib->ms.support.NotEnoughDSCUnits + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && mode_lib->ms.support.ROBSupport + && mode_lib->ms.support.ROBUrgencyAvoidance + && mode_lib->ms.support.OutstandingRequestsSupport + && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance + && mode_lib->ms.support.DISPCLK_DPPCLK_Support + && mode_lib->ms.support.TotalAvailablePipesSupport + && mode_lib->ms.support.NumberOfOTGSupport + && mode_lib->ms.support.NumberOfHDMIFRLSupport + && mode_lib->ms.support.NumberOfDP2p0Support + && mode_lib->ms.support.EnoughWritebackUnits + && mode_lib->ms.support.WritebackLatencySupport + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport + && mode_lib->ms.support.CursorSupport + && mode_lib->ms.support.PitchSupport + && !mode_lib->ms.support.ViewportExceedsSurface + && mode_lib->ms.support.PrefetchSupported + && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport + && mode_lib->ms.support.AvgBandwidthSupport + && mode_lib->ms.support.DynamicMetadataSupported + && mode_lib->ms.support.VRatioInPrefetchSupported + && mode_lib->ms.support.PTEBufferSizeNotExceeded + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded + && !mode_lib->ms.support.ExceededMALLSize + && mode_lib->ms.support.g6_temp_read_support + && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { + // && s->dram_clock_change_support == true + // && s->f_clock_change_support == true + // && (/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement || */ mode_lib->ms.support.USRRetrainingSupport)) { + dml2_printf("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport = true; + } else { + dml2_printf("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport = false; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; + } else { + mode_lib->ms.support.ODMMode[k] = dml2_odm_mode_bypass; + } + + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); +#endif + } + +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.ModeSupport) + dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + + dml2_printf("DML::%s: --- DONE --- \n", __func__); +#endif + + return mode_lib->ms.support.ModeSupport; +} + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + unsigned int result; + + dml2_printf("DML::%s: ------------- START ----------\n", __func__); + result = dml_core_mode_support(in_out_params); + + if (result) + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + + dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + + return result; +} + +static void CalculatePixelDeliveryTimes( + const struct dml2_display_cfg *display_cfg, + const struct core_display_cfg_support_info *cfg_support_info, + unsigned int NumberOfActiveSurfaces, + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + + // Output + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) +{ + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); +#endif + } +} + +static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + double pixel_clock_mhz; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { + meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; + meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * + p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + if (p->BytePerPixelC[k] == 0) { + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + } + } else { + p->TimePerMetaChunkNominal[k] = 0; + p->TimePerMetaChunkVBlank[k] = 0; + p->TimePerMetaChunkFlip[k] = 0; + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); +#endif + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; + else + p->time_per_tdlut_group[k] = 0; + + dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + + if (p->display_cfg->gpuvm_enable == true) { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); + } + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); + } +#ifdef DML_VM_PTE_ADL_PATCH_EN + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } +#endif + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + if (p->BytePerPixelC[k] == 0) { + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); + } + + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); + } +#ifdef DML_VM_PTE_ADL_PATCH_EN + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } +#endif + dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + } + } else { + p->time_per_pte_group_nom_luma[k] = 0; + p->time_per_pte_group_vblank_luma[k] = 0; + p->time_per_pte_group_flip_luma[k] = 0; + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + + dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelC[], + double dst_y_per_vm_vblank[], + double dst_y_per_vm_flip[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int tdlut_pte_bytes_per_frame[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + bool mrq_present, + + // Output + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int num_group_per_lower_vm_stage = 0; + unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage_flip; + unsigned int num_group_per_lower_vm_stage_pref; + unsigned int num_req_per_lower_vm_stage_flip; + unsigned int num_req_per_lower_vm_stage_pref; + double line_time; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (display_cfg->gpuvm_enable) { + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); + + if (BytePerPixelC[k] > 0) + num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); + } + + if (dcc_mrq_enable) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + + math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)); + } + } + + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); + if (display_cfg->gpuvm_max_page_table_levels >= 2) + num_group_per_lower_vm_stage_pref += 1; // tdpe0 group + } + + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; + } + + if (dcc_mrq_enable) { + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; + } + + num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; + num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; + } + + line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + +#ifdef DML_VM_PTE_ADL_PATCH_EN + if (num_group_per_lower_vm_stage_flip <= 2) { + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; + } + + if (num_group_per_lower_vm_stage_pref <= 2) { + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; + } +#endif + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + if (num_req_per_lower_vm_stage_pref > 0) + TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; + else + TimePerVMRequestVBlank[k] = 0.0; + if (num_req_per_lower_vm_stage_flip > 0) + TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; + else + TimePerVMRequestFlip[k] = 0.0; + + dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref); + dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip); + dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref); + dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip); + + if (display_cfg->gpuvm_max_page_table_levels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} + +static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateStutterEfficiency_params *p) +{ + struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; + + unsigned int TotalNumberOfActiveOTG = 0; + double SinglePixelClock = 0; + unsigned int SingleHTotal = 0; + unsigned int SingleVTotal = 0; + bool SameTiming = true; + bool FoundCriticalSurface = false; + double LastZ8StutterPeriod = 0; + + unsigned int SwathSizeCriticalSurface; + unsigned int LastChunkOfSwathSize; + unsigned int MissingPartOfLastSwathOfDETSize; + + memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionLuma = 2; + } else { + l->MaximumEffectiveCompressionLuma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionChroma = 2; + } else { + l->MaximumEffectiveCompressionChroma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; + } + } else { + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; + l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + + dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); +#endif + if (l->AverageDCCZeroSizeFraction == 1) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; + + + } else if (l->AverageDCCZeroSizeFraction > 0) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + + (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) + : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); +#endif + } else { + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + + ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); + dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); +#endif + + *p->StutterPeriod = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); + l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); +#endif + + if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { + bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = l->DETBufferingTimeY; + l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; + l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; + l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); +#endif + } + } + } + + // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer + // stutter period is calculated only on the det sizing + // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress + // else + // the cdb + rob part will be in compressed rate with urg bw (idea bw) + // the det part will be return at uncompressed rate with 64B/dcfclk + // + // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" + // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk + + l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer + / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) + / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); +#endif + + l->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + l->TotalActiveWriteback = l->TotalActiveWriteback + 1; + } + } + + if (l->TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].stream_index == k) { + if (TotalNumberOfActiveOTG == 0) { + SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; + } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + } + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } else { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->StutterEfficiency = 0; + *p->NumberOfStutterBurstsPerFrame = 0; + } + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } else { + *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->Z8StutterEfficiency = 0.; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); + LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); + MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && + (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); + dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); + dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); + dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} + +static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + struct dml2_display_cfg_programming *programming = in_out_params->programming; + + struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; + + unsigned int j, k; + bool must_support_iflip; + + const long min_return_uclk_cycles = 83; + const long min_return_fclk_cycles = 75; + const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; + double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; + double max_uclk_mhz = 0; + double min_return_latency_in_DCFCLK_cycles = 0; + + dml2_printf("DML::%s: --- START --- \n", __func__); + + memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); + memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); + + s->num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); + dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); + + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + + for (k = 0; k < s->num_active_planes; ++k) { + switch (cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].odms_used) { + case (4): + if (cfg_support_info->plane_support_info[k].dpps_used == 1) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; // FIXME_STAGE2: for mode programming same as dml2_odm_mode_split_1to2? + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; + break; + case (3): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; + break; + case (2): + if (cfg_support_info->plane_support_info[k].dpps_used == 1) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; + break; + default: + mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; + break; + } + } + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + dml2_assert(mode_lib->mp.Dppclk[k] > 0); + } + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + } + + mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + + dml2_assert(mode_lib->mp.Dcfclk > 0); + dml2_assert(mode_lib->mp.FabricClock > 0); + dml2_assert(mode_lib->mp.dram_bw_mbps > 0); + dml2_assert(mode_lib->mp.uclk_freq_mhz > 0); + dml2_assert(mode_lib->mp.GlobalDPPCLK > 0); + dml2_assert(mode_lib->mp.Dispclk > 0); + dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0); + dml2_assert(s->SOCCLK > 0); + +#ifdef __DML_VBA_DEBUG__ + // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes); + // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes); + // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes); + // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes); + // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes); + + dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + for (k = 0; k < s->num_active_planes; ++k) { + dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + } + dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { + dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + } + + for (k = 0; k < s->num_active_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &s->MaxTotalDETInKByte, + &s->NomDETInKByte, + &s->MinCompressedBufferSizeInKByte); + + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + + /* Output */ + &mode_lib->mp.PSCL_THROUGHPUT[k], + &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], + &mode_lib->mp.DPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + // Output + &mode_lib->mp.BytePerPixelY[k], + &mode_lib->mp.BytePerPixelC[k], + &mode_lib->mp.BytePerPixelInDETY[k], + &mode_lib->mp.BytePerPixelInDETC[k], + &mode_lib->mp.Read256BlockHeightY[k], + &mode_lib->mp.Read256BlockHeightC[k], + &mode_lib->mp.Read256BlockWidthY[k], + &mode_lib->mp.Read256BlockWidthC[k], + &mode_lib->mp.MacroTileHeightY[k], + &mode_lib->mp.MacroTileHeightC[k], + &mode_lib->mp.MacroTileWidthY[k], + &mode_lib->mp.MacroTileWidthC[k], + &mode_lib->mp.surf_linear128_l[k], + &mode_lib->mp.surf_linear128_c[k]); + } + + CalculateSwathWidth( + display_cfg, + false, // ForceSingleDPP + s->num_active_planes, + mode_lib->mp.ODMMode, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.surf_linear128_l, + mode_lib->mp.surf_linear128_c, + mode_lib->mp.NoOfDPP, + + /* Output */ + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + mode_lib->mp.SwathWidthSingleDPPY, + mode_lib->mp.SwathWidthSingleDPPC, + mode_lib->mp.SwathWidthY, + mode_lib->mp.SwathWidthC, + s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] + s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub); + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + + // VBA_DELTA + // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + // DSCCLK + /* + s->DSCFormatFactor = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if ((display_cfg->plane_descriptors[k].stream_index != k) || !cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable) { + } else { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) + s->DSCFormatFactor = 2; + else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) + s->DSCFormatFactor = 1; + else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) + s->DSCFormatFactor = 2; + else + s->DSCFormatFactor = 1; + + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + } + #ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DSCEnabled = %u\n", __func__, k, cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable); + dml2_printf("DML::%s: k=%u, BlendingAndTiming = %u\n", __func__, k, display_cfg->plane_descriptors[k].stream_index); + dml2_printf("DML::%s: k=%u, PixelClockBackEndFactor = %f\n", __func__, k, s->PixelClockBackEndFactor); + dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + dml2_printf("DML::%s: k=%u, DSCCLK = %f\n", __func__, k, mode_lib->mp.DSCCLK[k]); + #endif + } + */ + + // DSC Delay + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, + mode_lib->mp.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + // Prefetch + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < s->num_active_planes; ++k) + mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + s->num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.MacroTileWidthY, + mode_lib->mp.MacroTileWidthC, + mode_lib->mp.MacroTileHeightY, + mode_lib->mp.MacroTileHeightC, + + /* Output */ + mode_lib->mp.SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; + s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; + CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; + CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; + CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < s->num_active_planes; k++) { + mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < s->num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + s->num_active_planes); + } + + // Calculate all the bandwidth availabe + calculate_bandwidth_available( + mode_lib->mp.avg_bandwidth_available_min, + mode_lib->mp.avg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_min, + mode_lib->mp.urg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_vm_only, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->mp.dram_bw_mbps); + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < s->num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; + } + // Calculate tdlut schedule related terms + for (k = 0; k <= s->num_active_planes - 1; k++) { + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderBytes, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.request_size_bytes_luma, + mode_lib->mp.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->mp.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->mp.ExtraLatency, + &mode_lib->mp.ExtraLatency_sr, + &mode_lib->mp.ExtraLatencyPrefetch); + + mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackDelay[k] = + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; + } else + mode_lib->mp.WritebackDelay[k] = 0; + + for (j = 0; j < s->num_active_planes; ++j) { + if (display_cfg->plane_descriptors[j].stream_index == k + && display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackDelay[k] = + math_max2( + mode_lib->mp.WritebackDelay[k], + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk); + } + } + } + } + + for (k = 0; k < s->num_active_planes; ++k) + for (j = 0; j < s->num_active_planes; ++j) + if (display_cfg->plane_descriptors[k].stream_index == j) + mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; + + mode_lib->mp.UrgentLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = CalculateTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); + + mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + for (k = 0; k < s->num_active_planes; ++k) { + bool cursor_not_enough_urgent_latency_hiding = 0; + double line_time_us; + + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, + + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + + /* output */ + &mode_lib->mp.UrgentBurstFactorLuma[k], + &mode_lib->mp.UrgentBurstFactorChroma[k], + &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->mp.WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); +#endif + } + + s->immediate_flip_required = false; + for (k = 0; k < s->num_active_planes; ++k) { + s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); +#endif + + { + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + mode_lib->mp.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory); + + myPipe->Dppclk = mode_lib->mp.Dppclk[k]; + myPipe->Dispclk = mode_lib->mp.Dispclk; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->mp.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + + mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); +#endif + mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; + } // for k + + mode_lib->mp.PrefetchModeSupported = true; + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NoTimeToPrefetch[k] == true || + mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || + mode_lib->mp.DSTYAfterScaler[k] > 8) { + dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + if (mode_lib->mp.dst_y_prefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + s->VRatioPrefetchMoreThanMax = true; + + if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { + dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + + if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + mode_lib->mp.PrefetchModeSupported = false; + } + + dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); + + // Prefetch schedule OK, now check prefetch bw + if (mode_lib->mp.PrefetchModeSupported == true) { + for (k = 0; k < s->num_active_planes; ++k) { + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + mode_lib->mp.VRatioPrefetchY[k], + mode_lib->mp.VRatioPrefetchC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + /* Output */ + &mode_lib->mp.UrgentBurstFactorLumaPre[k], + &mode_lib->mp.UrgentBurstFactorChromaPre[k], + &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + + dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + + dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); +#endif + } + + for (k = 0; k <= s->num_active_planes - 1; k++) + mode_lib->mp.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.non_urg_bandwidth_required, + + // Input + display_cfg, + 0, // inc_flip_bw + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + mode_lib->mp.SurfaceReadBandwidthLuma, + mode_lib->mp.SurfaceReadBandwidthChroma, + mode_lib->mp.RequiredPrefetchPixelDataBWLuma, + mode_lib->mp.RequiredPrefetchPixelDataBWChroma, + mode_lib->mp.cursor_bw, + mode_lib->mp.dpte_row_bw, + mode_lib->mp.meta_row_bw, + mode_lib->mp.prefetch_cursor_bw, + mode_lib->mp.prefetch_vmrow_bw, + mode_lib->mp.final_flip_bw, + mode_lib->mp.UrgentBurstFactorLuma, + mode_lib->mp.UrgentBurstFactorChroma, + mode_lib->mp.UrgentBurstFactorCursor, + mode_lib->mp.UrgentBurstFactorLumaPre, + mode_lib->mp.UrgentBurstFactorChromaPre, + mode_lib->mp.UrgentBurstFactorCursorPre); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth + &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall + &s->dummy_boolean[1], // vactive bw ok + &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.non_urg_bandwidth_required, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.PrefetchModeSupported) + dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { + dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + } // prefetch schedule ok + + // Prefetch schedule and prefetch bw ok, now check flip bw + if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw + + mode_lib->mp.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required, // no flip + mode_lib->mp.urg_bandwidth_available); + mode_lib->mp.TotImmediateFlipBytes = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k = %u\n", __func__, k); + dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); +#endif + } + for (k = 0; k < s->num_active_planes; ++k) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 0, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.BandwidthAvailableForImmediateFlip, + mode_lib->mp.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.Tno_bw[k], + mode_lib->mp.dpte_row_height[k], + mode_lib->mp.dpte_row_height_chroma[k], + mode_lib->mp.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + s->per_pipe_flip_bytes[k], + mode_lib->mp.meta_row_bytes[k], + mode_lib->mp.meta_row_height[k], + mode_lib->mp.meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + // Output + &mode_lib->mp.dst_y_per_vm_flip[k], + &mode_lib->mp.dst_y_per_row_flip[k], + &mode_lib->mp.final_flip_bw[k], + &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + s->dummy_bw, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + + // Input + display_cfg, + 1, // inc_flip_bw + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + mode_lib->mp.SurfaceReadBandwidthLuma, + mode_lib->mp.SurfaceReadBandwidthChroma, + mode_lib->mp.RequiredPrefetchPixelDataBWLuma, + mode_lib->mp.RequiredPrefetchPixelDataBWChroma, + mode_lib->mp.cursor_bw, + mode_lib->mp.dpte_row_bw, + mode_lib->mp.meta_row_bw, + mode_lib->mp.prefetch_cursor_bw, + mode_lib->mp.prefetch_vmrow_bw, + mode_lib->mp.final_flip_bw, + mode_lib->mp.UrgentBurstFactorLuma, + mode_lib->mp.UrgentBurstFactorChroma, + mode_lib->mp.UrgentBurstFactorCursor, + mode_lib->mp.UrgentBurstFactorLumaPre, + mode_lib->mp.UrgentBurstFactorChromaPre, + mode_lib->mp.UrgentBurstFactorCursorPre); + + calculate_immediate_flip_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip + &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok + + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + mode_lib->mp.urg_bandwidth_available); + + if (!mode_lib->mp.ImmediateFlipSupported) + dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->mp.ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Pipe %0d not supporing iflip!\n", __func__, k); +#endif + } + } + } else { // flip or prefetch not support + mode_lib->mp.ImmediateFlipSupported = false; + } + + // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) + must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; + mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + for (k = 0; k < s->num_active_planes; ++k) + dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); +#endif + dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) + dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + + if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { + dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + } else { + dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + + // DCC Configuration + for (k = 0; k < s->num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + display_cfg->plane_descriptors[k].surface.dcc.enable, + display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.plane0.width, + display_cfg->plane_descriptors[k].surface.plane1.width, + display_cfg->plane_descriptors[k].surface.plane0.height, + display_cfg->plane_descriptors[k].surface.plane1.height, + s->NomDETInKByte, + mode_lib->mp.Read256BlockHeightY[k], + mode_lib->mp.Read256BlockHeightC[k], + display_cfg->plane_descriptors[k].surface.tiling, + mode_lib->mp.BytePerPixelY[k], + mode_lib->mp.BytePerPixelC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + display_cfg->plane_descriptors[k].composition.rotation_angle, + + /* Output */ + &mode_lib->mp.RequestLuma[k], + &mode_lib->mp.RequestChroma[k], + &mode_lib->mp.DCCYMaxUncompressedBlock[k], + &mode_lib->mp.DCCCMaxUncompressedBlock[k], + &mode_lib->mp.DCCYMaxCompressedBlock[k], + &mode_lib->mp.DCCCMaxCompressedBlock[k], + &mode_lib->mp.DCCYIndependentBlock[k], + &mode_lib->mp.DCCCIndependentBlock[k]); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; + s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; + s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; + s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.USRRetrainingLatency = 0; //0; //FIXME_STAGE2 + s->mmSOCParameters.SMNLatency = 0; //mode_lib->soc.smn_latency_us; //FIXME_STAGE2 + s->mmSOCParameters.g6_temp_read_blackout_us = mode_lib->soc.power_management_parameters.g6_temp_read_blackout_us[mode_lib->mp.active_min_uclk_dpm_index]; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/; + CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = s->SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; + //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 + CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; + CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; + CalculateWatermarks_params->VActiveLatencyHidingUs = 0; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); + } else { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + display_cfg, + cfg_support_info, + s->num_active_planes, + mode_lib->mp.VRatioPrefetchY, + mode_lib->mp.VRatioPrefetchC, + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub, + mode_lib->mp.PSCL_THROUGHPUT, + mode_lib->mp.PSCL_THROUGHPUT_CHROMA, + mode_lib->mp.Dppclk, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + + /* Output */ + mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, + mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, + mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); + + CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; + CalculateMetaAndPTETimes_params->display_cfg = display_cfg; + CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; + CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; + CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; + CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; + CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; + CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; + CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; + CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; + CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; + CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; + CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; + CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + + CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; + CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; + + CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); + + CalculateVMGroupAndRequestTimes( + display_cfg, + s->num_active_planes, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.dst_y_per_vm_vblank, + mode_lib->mp.dst_y_per_vm_flip, + mode_lib->mp.dpte_row_width_luma_ub, + mode_lib->mp.dpte_row_width_chroma_ub, + mode_lib->mp.vm_group_bytes, + mode_lib->mp.dpde0_bytes_per_frame_ub_l, + mode_lib->mp.dpde0_bytes_per_frame_ub_c, + s->tdlut_pte_bytes_per_frame, + mode_lib->mp.meta_pte_bytes_per_frame_ub_l, + mode_lib->mp.meta_pte_bytes_per_frame_ub_c, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + mode_lib->mp.TimePerVMGroupVBlank, + mode_lib->mp.TimePerVMGroupFlip, + mode_lib->mp.TimePerVMRequestVBlank, + mode_lib->mp.TimePerVMRequestFlip); + + // VStartup Adjustment + for (k = 0; k < s->num_active_planes; ++k) { + bool isInterlaceTiming; + + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; + if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + + mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; + if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { + mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; + } + + isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + + // The actual positioning of the vstartup + mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; + s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; + + if (s->blank_lines_remaining < 0) { + dml2_printf("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + DML2_ASSERT(0); + } + mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= + (isInterlaceTiming ? + math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + s->TotalWRBandwidth = 0; + s->WRBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) { + s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8; + } + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; + } + + mode_lib->mp.TotalDataReadBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); +#endif + } + + CalculateStutterEfficiency_params->display_cfg = display_cfg; + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; + + // output + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; + + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); +#else + mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; + mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; +#endif + } // PrefetchAndImmediateFlipSupported + + max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; + min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; + mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; + mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); + DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles); + dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles); + dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + dml2_printf("DML::%s: --- END --- \n", __func__); +#endif + return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); +} + +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + bool result = dml_core_mode_programming(in_out_params); + + dml2_printf("DML::%s: ------------- START ----------\n", __func__); + dml2_printf("DML::%s: result = %0d\n", __func__, result); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + return result; +} + +void dml2_core_calcs_get_dpte_row_height( + unsigned int *dpte_row_height, + struct dml2_core_internal_display_mode_lib *mode_lib, + bool is_plane1, + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + enum dml2_rotation_angle ScanDirection, + unsigned int pitch, + unsigned int GPUVMMinPageSizeKBytes) +{ + unsigned int BytePerPixelY; + unsigned int BytePerPixelC; + double BytePerPixelInDETY; + double BytePerPixelInDETC; + unsigned int BlockHeight256BytesY; + unsigned int BlockHeight256BytesC; + unsigned int BlockWidth256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int MacroTileWidthY; + unsigned int MacroTileWidthC; + unsigned int MacroTileHeightY; + unsigned int MacroTileHeightC; + bool surf_linear_128_l; + bool surf_linear_128_c; + + CalculateBytePerPixelAndBlockSizes( + SourcePixelFormat, + SurfaceTiling, + pitch, + pitch, + + /* Output */ + &BytePerPixelY, + &BytePerPixelC, + &BytePerPixelInDETY, + &BytePerPixelInDETC, + &BlockHeight256BytesY, + &BlockHeight256BytesC, + &BlockWidth256BytesY, + &BlockWidth256BytesC, + &MacroTileHeightY, + &MacroTileHeightC, + &MacroTileWidthY, + &MacroTileWidthC, + &surf_linear_128_l, + &surf_linear_128_c); + + unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY; + unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY; + unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY; + unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY; + unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; + unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1); + dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); +#endif + unsigned int dummy_integer[21]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling; + mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel; + mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection; + mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4; + mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests; + mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight; + mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0; + mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14]; + + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19]; + mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20]; + + // just supply with enough parameters to calculate dpte + CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); +#endif +} + +static bool is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz); + wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz); + wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); +} + +static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (unsigned int)(math_log2((float)a) - subtrahend); +} + +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) +{ + int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - + (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); + cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); +#endif + + cursor_dlg_regs->chunk_hdl_adjust = 3; + cursor_dlg_regs->dst_y_offset = 0; + + cursor_dlg_regs->qos_level_fixed = 8; + cursor_dlg_regs->qos_ramp_disable = 0; +} + +static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + unsigned int pipe_idx) +{ + unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; + enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; + bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format)); + + unsigned int pixel_chunk_bytes = 0; + unsigned int min_pixel_chunk_bytes = 0; + unsigned int dpte_group_bytes = 0; + unsigned int mpte_group_bytes = 0; + + unsigned int p1_pixel_chunk_bytes = 0; + unsigned int p1_min_pixel_chunk_bytes = 0; + unsigned int p1_dpte_group_bytes = 0; + unsigned int p1_mpte_group_bytes = 0; + + unsigned int detile_buf_plane1_addr = 0; + unsigned int detile_buf_size_in_bytes; + double stored_swath_l_bytes; + double stored_swath_c_bytes; + bool is_phantom_pipe; + + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); + min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx)); + mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx)); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml2_rgbe_alpha) + p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); + + rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib); + rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); + rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); + + rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); + rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); + rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); + rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); + + detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024); + + if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { + unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); +#endif + DML2_ASSERT(p0_pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = (unsigned int)(math_floor2(math_log2((float)p0_pte_row_height_linear), 1) - 3); + if (dual_plane) { + unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); +#endif + if (sw_mode == dml2_sw_linear) { + DML2_ASSERT(p1_pte_row_height_linear >= 8); + } + rq_regs->rq_regs_c.pte_row_height_linear = (unsigned int)(math_floor2(math_log2((float)p1_pte_row_height_linear), 1) - 3); + } + } else { + rq_regs->rq_regs_l.pte_row_height_linear = 0; + rq_regs->rq_regs_c.pte_row_height_linear = 0; + } + + rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0); + rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0); + + // FIXME_DCN4, programming guide has dGPU condition + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + + stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx); + stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx); + is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + //dml2_printf_rq_regs_st(rq_regs); + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +static void rq_dlg_get_dlg_reg( + struct dml2_core_internal_scratch *s, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + const unsigned int pipe_idx) +{ + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); + + dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + + l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + dml2_assert(l->plane_idx < DML2_MAX_PLANES); + + l->source_format = dml2_444_8; + l->odm_mode = dml2_odm_mode_bypass; + l->dual_plane = false; + l->htotal = 0; + l->hactive = 0; + l->hblank_end = 0; + l->vblank_end = 0; + l->interlaced = false; + l->pclk_freq_in_mhz = 0.0; + l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + l->ref_freq_to_pix_freq = 0.0; + + if (l->plane_idx < DML2_MAX_PLANES) { + + l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; + l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; + l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; + + l->dual_plane = is_dual_plane(l->source_format); + + l->htotal = l->timing->h_total; + l->hactive = l->timing->h_active; + l->hblank_end = l->timing->h_blank_end; + l->vblank_end = l->timing->v_blank_end; + l->interlaced = l->timing->interlaced; + l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; + l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; + + dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz); + dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + + DML2_ASSERT(l->refclk_freq_in_mhz != 0); + DML2_ASSERT(l->pclk_freq_in_mhz != 0); + DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (l->odm_mode == dml2_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); + } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + l->num_active_pipes = mode_lib->mp.num_active_pipes; + l->first_pipe_idx_in_plane = DML2_MAX_PLANES; + l->pipe_idx_in_combine = 0; // pipe index within the plane + l->odm_combine_factor = 2; + + if (l->odm_mode == dml2_odm_mode_combine_3to1) + l->odm_combine_factor = 3; + else if (l->odm_mode == dml2_odm_mode_combine_4to1) + l->odm_combine_factor = 4; + + for (unsigned int i = 0; i < l->num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { + if (i < l->first_pipe_idx_in_plane) { + l->first_pipe_idx_in_plane = i; + } + } + } + l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); + dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + } + dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits + + l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); + + dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + + l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); + disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; + + dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + + dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + + l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + + dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + + if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { + DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + } + + l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; + l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; + + dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + + // Active + l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + + l->refcyc_per_line_delivery_pre_c = 0.0; + l->refcyc_per_line_delivery_c = 0.0; + + if (l->dual_plane) { + l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + + l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + + l->refcyc_per_req_delivery_pre_c = 0.0; + l->refcyc_per_req_delivery_c = 0.0; + if (l->dual_plane) { + l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + } + + // TTU - Cursor + DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); + DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); + + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + + l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); + + l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); + + disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); + + // CHECK for HW registers' range, DML2_ASSERT or clamp + DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); + + + DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + + if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { + dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; + } + if (l->dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { + dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; + } + } + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); + if (l->dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); + } + DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + if (l->dual_plane) { + DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + } + + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + + } +} + +static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) +{ + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; + arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max + arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; + arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024; + arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); + arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); +#endif + +} + +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) +{ + rq_dlg_get_wm_regs(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) +{ + rq_dlg_get_arb_params(display_cfg, mode_lib, out); +} + +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, + struct dml2_core_internal_display_mode_lib *mode_lib, + struct dml2_dchub_per_pipe_register_set *out, int pipe_index) +{ + rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); + rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); + out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; +} + +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) +{ + out->dcn4.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) +{ + // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + + dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); +} + +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_fams2_stream_static_state *fams2_programming, + enum dml2_uclk_pstate_support_method pstate_method, + int plane_index) +{ + const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; + const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index]; + + unsigned int i; + + /* from display configuration */ + fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; + fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; + fams2_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch); + fams2_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_descriptor->timing.v_active); + fams2_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; + + /* from meta */ + fams2_programming->otg_vline_time_ns = + (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0); + fams2_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; + fams2_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; + fams2_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; + fams2_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - + stream_descriptor->timing.v_front_porch - + stream_fams2_meta->method_drr.programming_delay_otg_vlines); + fams2_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; + fams2_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; + + /* from core */ + fams2_programming->config.bits.min_ttu_vblank_usable = true; + for (i = 0; i < display_cfg->display_config.num_planes; i++) { + /* check if all planes support p-state in blank */ + if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && + mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { + fams2_programming->config.bits.min_ttu_vblank_usable = false; + break; + } + } + + switch (pstate_method) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + /* legacy vactive */ + fams2_programming->type = FAMS2_STREAM_TYPE_VACTIVE; + fams2_programming->sub_state.legacy.vactive_det_fill_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + fams2_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; + fams2_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; + fams2_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + /* legacy vblank */ + fams2_programming->type = FAMS2_STREAM_TYPE_VBLANK; + fams2_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; + fams2_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; + fams2_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_uclk_pstate_support_method_fw_drr: + /* drr */ + fams2_programming->type = FAMS2_STREAM_TYPE_DRR; + fams2_programming->sub_state.drr.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; + fams2_programming->sub_state.drr.nom_stretched_vtotal = + (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; + fams2_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; + fams2_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; + /* drr only clamps to vtotal min for single display */ + fams2_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; + fams2_programming->sub_state.drr.only_stretch_if_required = true; + break; + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + /* subvp */ + fams2_programming->type = FAMS2_STREAM_TYPE_SUBVP; + fams2_programming->sub_state.subvp.vratio_numerator = + (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); + fams2_programming->sub_state.subvp.vratio_denominator = 1000; + fams2_programming->sub_state.subvp.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; + fams2_programming->sub_state.subvp.prefetch_to_mall_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + fams2_programming->sub_state.subvp.phantom_vtotal = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; + fams2_programming->sub_state.subvp.phantom_vactive = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; + fams2_programming->sub_state.subvp.config.bits.is_multi_planar = + plane_descriptor->surface.plane1.height > 0; + fams2_programming->sub_state.subvp.config.bits.is_yuv420 = + plane_descriptor->pixel_format == dml2_420_8 || + plane_descriptor->pixel_format == dml2_420_10 || + plane_descriptor->pixel_format == dml2_420_12; + + fams2_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; + fams2_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; + fams2_programming->config.bits.clamp_vtotal_min = true; + break; + case dml2_uclk_pstate_support_method_reserved_hw: + case dml2_uclk_pstate_support_method_reserved_fw: + case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: + case dml2_uclk_pstate_support_method_reserved_fw_drr_var: + case dml2_uclk_pstate_support_method_not_supported: + case dml2_uclk_pstate_support_method_count: + default: + /* this should never happen */ + break; + } +} + +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) +{ + unsigned int n; + + out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx); + out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx); + out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx); + out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx); + + for (n = 0; n < out->num_mcaches_plane0; n++) + out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n); + + for (n = 0; n < out->num_mcaches_plane1; n++) + out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n); + + out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx); + out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx); + out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx); + out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx); + + out->valid = true; +} + +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) +{ + *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index); +} + +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) +{ + out->mall_svp_size_requirement_ways = 0; + + out->nominal_vblank_pstate_latency_hiding_us = + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); + + out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; + + out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; +} + +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) +{ + double phantom_processing_delay_pix; + unsigned int phantom_processing_delay_lines; + unsigned int phantom_v_active_lines; + unsigned int phantom_v_startup_lines; + unsigned int phantom_v_blank_lines; + unsigned int main_v_blank_lines; + unsigned int rem; + + phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); + phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); + dml2_core_shared_div_rem(phantom_processing_delay_pix, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, + &rem); + if (rem) + phantom_processing_delay_lines++; + + phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + phantom_v_active_lines = phantom_processing_delay_lines + dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) + mode_lib->ip.subvp_swath_height_margin_lines; + + // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) + phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; + main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; + if (phantom_v_blank_lines > main_v_blank_lines) + phantom_v_blank_lines = main_v_blank_lines; + + out->phantom_v_active = phantom_v_active_lines; + // phantom_vtotal = vactive + vblank + out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; + + out->phantom_min_v_active = dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index); + out->phantom_v_startup = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + + out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us); +#endif +} + +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) +{ + unsigned int k, n; + + out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; + out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; + out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; + out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; + out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; + out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; + out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; + out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; + out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; + out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; + out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; + out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; + out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; + out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; + out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; + out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; + out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; + out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; + out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; + out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; + out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; + out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + + out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; + out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; + out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; + out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; + out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; + out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; + out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; + + out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; + out->informative.mode_support_info.ROBUrgencyAvoidance = mode_lib->ms.support.ROBUrgencyAvoidance; + out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; + out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; + out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; + out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; + + out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; + out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; + out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; + + out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; + out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; + out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; + out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; + out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; + out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; + + for (k = 0; k < out->display_config.num_planes; k++) { + + out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; + out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; + out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; + out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; + out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; + out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; + out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; + out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; + + if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; + + if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; + + out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; + out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; + } + + out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib); + out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib); + out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib); + out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib); + + out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib); + out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib); + out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib); + out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib); + out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); + out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); + out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); + out->informative.watermarks.g6_temp_read_watermark_us = dml_get_wm_g6_temp_read(mode_lib); + + out->informative.mall.total_surface_size_in_mall_bytes = 0; + for (k = 0; k < out->display_config.num_planes; ++k) + out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; + + out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; + out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib); + + out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); + out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib); + out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib); + out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib); + + out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib); + out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib); + out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib); + + out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib); + out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib); + + out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib); + out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib); + + out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib); + out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib); + + out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib); + out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib); + + out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib); + out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib); + + out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib); + + out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib); + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib); + + out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib); + out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib); + + out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib); + out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib); + out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + + out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib); + out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib); + + out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib); + out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib); + out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib); + out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib); + + out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib); + out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib); + + out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); + + out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + + out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); + + for (k = 0; k < out->display_config.num_planes; k++) { + + if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 0; + else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 1; + else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) + out->informative.misc.PrefetchMode[k] = 2; + else + out->informative.misc.PrefetchMode[k] = 3; + + out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; + out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; + out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; + out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; + out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; + out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; + out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; + out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; + out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; + out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; + out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; + out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; + out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; + out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; + out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; + out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; + out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; + out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; + out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; + out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; + out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; + out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; + out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; + out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; + out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; + out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; + out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; + out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; + out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; + out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; + out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; + out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; + out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; + out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; + + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; + out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; + out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; + out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; + out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; + out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; + out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; + out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; + out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; + out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; + out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; + out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; + out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; + out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + + out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; + out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; + out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; + out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; + out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; + out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; + out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + } + + // For this DV informative layer, all pipes in the same planes will just use the same id + // will have the optimization and helper layer later on + // only work when we can have high "mcache" that fit everything without thrashing the cache + for (k = 0; k < out->display_config.num_planes; k++) { + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; + } + + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k); + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k); + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n); + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h new file mode 100644 index 000000000000..b280ab573fbb --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_CORE_DCN4_CALCS_H__ +#define __DML2_CORE_DCN4_CALCS_H__ + +#include "dml2_core_shared_types.h" + +struct dml2_dchub_watermark_regs; +struct dml2_display_arb_regs; +struct dml2_per_stream_programming; +struct dml2_dchub_per_pipe_register_set; +struct core_plane_support_info; +struct core_stream_support_info; +struct dml2_cursor_dlg_regs; +struct display_configuation_with_meta; + +unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params); +bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params); +void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); +void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); +void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *dml2_display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); +void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); +void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index); +void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_index); +void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_index); +void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); +void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); +void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); + +void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); +void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c new file mode 100644 index 000000000000..1a0da8c6df5a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_core_factory.h" +#include "dml2_core_dcn4.h" +#include "dml2_external_lib_deps.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out) +{ + bool result = false; + + if (out == 0) + return false; + + memset(out, 0, sizeof(struct dml2_core_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + result = false; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = &core_dcn4_initialize; + out->mode_support = &core_dcn4_mode_support; + out->mode_programming = &core_dcn4_mode_programming; + out->populate_informative = &core_dcn4_populate_informative; + out->calculate_mcache_allocation = &core_dcn4_calculate_mcache_allocation; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h new file mode 100644 index 000000000000..53636a8f52aa --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_CORE_FACTORY_H__ +#define __DML2_CORE_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c new file mode 100644 index 000000000000..0ef77a89d984 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -0,0 +1,12411 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml2_core_shared.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 + +double dml2_core_shared_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; + +} + +/* + * START OF STATIC HELPERS + * These static methods are baseline implemenations from DCN4. These should NEVER + * be modified when developing new DCNs. New DCN code should replace the static helpers + * using the function pointer pattern. + */ + +static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx); +static void CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + bool is_mrq_present, + + // Output + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte); + static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd); +static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan); +static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode); +static void CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + unsigned int pitch_y, + unsigned int pitch_c, + + // Output + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC, + bool *surf_linear128_l, + bool *surf_linear128_c); +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum dml2_source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + // Output + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP); +static void CalculateSwathWidth( + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum dml2_odm_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + bool surf_linear128_l[], + bool surf_linear128_c[], + unsigned int DPPPerSurface[], + + // Output + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + unsigned int SwathWidthSingleDPPY[], + unsigned int SwathWidthSingleDPPC[], + unsigned int SwathWidthY[], // per-pipe + unsigned int SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]); // per-pipe +static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear); +static void CalculateDETBufferSize(struct dml2_core_shared_calculate_det_buffer_size_params *p); +static double CalculateRequiredDispclk(enum dml2_odm_mode ODMMode, double PixelClock); +static double TruncToValidBPP( + struct dml2_core_shared_TruncToValidBPP_locals *l, + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + + // Output + unsigned int *RequiredSlots); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum dml2_output_format_class pixelFormat, + enum dml2_output_encoder_class Output); +static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output); +static unsigned int CalculateHostVMDynamicLevels(bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMinPageSize, unsigned int HostVMMaxNonCachedPageTableLevels); +static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p); +static unsigned int CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dml2_rotation_angle RotationAngle, + bool mirrored, + bool ViewportStationary, + unsigned int SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + // Output + unsigned int *VInitPreFill, + unsigned int *MaxNumSwath); +static void CalculateRowBandwidth( + bool GPUVMEnable, + bool use_one_row_for_frame, + enum dml2_source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + + bool mrq_present, + unsigned int meta_row_bytes_per_row_ub_l, + unsigned int meta_row_bytes_per_row_ub_c, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + + // Output + double *dpte_row_bw, + double *meta_row_bw); +static void CalculateMALLUseForStaticScreen( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + // Output + bool is_using_mall_for_ss[]); +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum dml2_source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dml2_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dml2_rotation_angle RotationAngle, + + // Output + enum dml2_core_internal_request_type *RequestLuma, + enum dml2_core_internal_request_type *RequestChroma, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma); +static void calculate_mcache_row_bytes(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_mcache_row_bytes_params *p); +static void calculate_mcache_setting(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_mcache_setting_params *p); +static void calculate_mall_bw_overhead_factor( + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes); +static double dml_get_return_bandwidth_available( + const struct dml2_soc_bb *soc, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool is_avg_bw, + bool is_hvm_en, + bool is_hvm_only, + double dcflk_mhz, + double fclk_mhz, + double dram_bw_mbps); +static void calculate_bandwidth_available( + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], + + const struct dml2_soc_bb *soc, + bool HostVMEnable, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps); +static void calculate_avg_bandwidth_required( + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double mall_prefetch_dram_overhead_factor[], + double mall_prefetch_sdp_overhead_factor[]); +static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateVMRowAndSwath_params *p); +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int urgent_ramp_uclk_cycles, + unsigned int df_qos_response_time_fclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_urgent_ramp_latency_margin, + double fabric_max_transport_latency_margin); +static double CalculateTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int trip_to_memory_uclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin); +static double CalculateMetaTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int meta_trip_to_memory_uclk_cycles, + unsigned int meta_trip_to_memory_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin); +static void calculate_cursor_req_attributes( + unsigned int cursor_width, + unsigned int cursor_bpp, + + // output + unsigned int *cursor_lines_per_chunk, + unsigned int *cursor_bytes_per_line, + unsigned int *cursor_bytes_per_chunk, + unsigned int *cursor_bytes); +static void calculate_cursor_urgent_burst_factor( + unsigned int CursorBufferSize, + unsigned int CursorWidth, + unsigned int cursor_bytes_per_chunk, + unsigned int cursor_lines_per_chunk, + double LineTime, + double UrgentLatency, + + double *UrgentBurstFactorCursor, + bool *NotEnoughUrgentLatencyHiding); +static void CalculateUrgentBurstFactor( + const struct dml2_plane_parameters *plane_cfg, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + // Output + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding); +static void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep); +static double CalculateWriteBackDelay( + enum dml2_source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal); +static unsigned int CalculateMaxVStartup( + bool ptoi_supported, + unsigned int vblank_nom_default_us, + const struct dml2_timing_cfg *timing, + double write_back_delay_us); +static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p); +static void CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + enum dml2_output_encoder_class Output, + enum dml2_odm_mode ODMUse, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int MaxNumDPP, + double PixelClock, + + // Output + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum dml2_odm_mode *ODMMode, + double *RequiredDISPCLKPerSurface); +static void CalculateOutputLink( + struct dml2_core_internal_scratch *s, + double PHYCLK, + double PHYCLKD18, + double PHYCLKD32, + double Downspreading, + bool IsMainSurfaceUsingTheIndicatedTiming, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + enum dml2_dsc_enable_option DSCEnable, + unsigned int OutputLinkDPLanes, + enum dml2_output_link_dp_rate OutputLinkDPRate, + + // Output + bool *RequiresDSC, + bool *RequiresFEC, + double *OutBpp, + enum dml2_core_internal_output_type *OutputType, + enum dml2_core_internal_output_type_rate *OutputRate, + unsigned int *RequiredSlots); +static double CalculateWriteBackDISPCLK( + enum dml2_source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize); +static double RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum dml2_output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout); +static unsigned int DSCDelayRequirement( + bool DSCEnabled, + enum dml2_odm_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum dml2_output_format_class OutputFormat, + enum dml2_output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd); +static void CalculateSurfaceSizeInMall( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int BytesPerPixelY[], + unsigned int BytesPerPixelC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + // Output + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize); +static void calculate_tdlut_setting(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_calculate_tdlut_setting_params *p); +static void CalculateTarb( + const struct dml2_display_cfg *display_cfg, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + double ReturnBW, + + unsigned int MetaChunkSize, + + // output + double *Tarb, + double *Tarb_prefetch); +static double CalculateTWait(long reserved_vblank_time_ns, double UrgentLatency, double Ttrip); +static void CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + // Output + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + unsigned int *VUpdateWidthPix, + unsigned int *VReadyOffsetPix); +static double get_urgent_bandwidth_required( + struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, + const struct dml2_display_cfg *display_cfg, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool inc_flip_bw, // including flip bw + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]); +static void CalculateExtraLatency( + const struct dml2_display_cfg *display_cfg, + unsigned int ROBBufferSizeInKByte, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + double FabricClock, + unsigned int PixelChunkSizeInKByte, + double ReturnBW, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + enum dml2_qos_param_type qos_type, + bool max_oustanding_when_urgent_expected, + unsigned int max_outstanding_requests, + unsigned int request_size_bytes_luma[], + unsigned int request_size_bytes_chroma[], + unsigned int MetaChunkSize, + unsigned int dchub_arb_to_ret_delay, + double Ttrip, + unsigned int hostvm_mode, + + // output + double *ExtraLatency, // Tex + double *ExtraLatency_sr, // Tex_sr + double *ExtraLatencyPrefetch); +static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p); +static void calculate_peak_bandwidth_required( + struct dml2_core_internal_scratch *s, + + // output + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int inc_flip_bw, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]); +static void check_urgent_bandwidth_support( + double *frac_urg_bandwidth_nom, + double *frac_urg_bandwidth_mall, + bool *vactive_bandwidth_support_ok, // vactive ok + bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok + + unsigned int mall_allocated_for_dcn_mbytes, + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]); +static double get_bandwidth_available_for_immediate_flip( + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]); +static void calculate_immediate_flip_bandwidth_support( + // Output + double *frac_urg_bandwidth_flip, + bool *flip_bandwidth_support_ok, + + // Input + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]); +static void CalculateFlipSchedule( + struct dml2_core_internal_scratch *s, + bool iflip_enable, + bool use_lb_flip_bw, + double HostVMInefficiencyFactor, + double Tvm_trips_flip, + double Tr0_trips_flip, + double Tvm_trips_flip_rounded, + double Tr0_trips_flip_rounded, + bool GPUVMEnable, + double vm_bytes, // vm_bytes + double DPTEBytesPerRow, // dpte_row_bytes + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum dml2_source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw_flip, + unsigned int dpte_row_height, + unsigned int dpte_row_height_chroma, + bool use_one_row_for_frame_flip, + unsigned int max_flip_time_us, + unsigned int per_pipe_flip_bytes, + unsigned int meta_row_bytes, + unsigned int meta_row_height, + unsigned int meta_row_height_chroma, + bool dcc_mrq_enable, + + // Output + double *dst_y_per_vm_flip, + double *dst_y_per_row_flip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p); +static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config); +static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config); +static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +static unsigned int get_pipe_flip_bytes( + double hostvm_inefficiency_factor, + unsigned int vm_bytes, + unsigned int dpte_row_bytes, + unsigned int meta_row_bytes); +static void calculate_hostvm_inefficiency_factor( + double *HostVMInefficiencyFactor, + double *HostVMInefficiencyFactorPrefetch, + + bool gpuvm_enable, + bool hostvm_enable, + unsigned int remote_iommu_outstanding_translations, + unsigned int max_outstanding_reqs, + double urg_bandwidth_avail_active_pixel_and_vm, + double urg_bandwidth_avail_active_vm_only); +static void CalculatePixelDeliveryTimes( + const struct dml2_display_cfg *display_cfg, + const struct core_display_cfg_support_info *cfg_support_info, + unsigned int NumberOfActiveSurfaces, + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + + // Output + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]); +static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p); +static void CalculateVMGroupAndRequestTimes( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelC[], + double dst_y_per_vm_vblank[], + double dst_y_per_vm_flip[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int tdlut_pte_bytes_per_frame[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + bool mrq_present, + + // Output + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]); +static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculateStutterEfficiency_params *p); +static bool dml_is_dual_plane(enum dml2_source_format_class source_format); +static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx); +static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs); +static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + unsigned int pipe_idx); +static void rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch *s, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + const unsigned int pipe_idx); +static void rq_dlg_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param); + +/* + * END OF STATIC HELPERS + */ + +bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) +{ + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + unsigned int k, m, n; + + memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support)); + + mode_lib->ms.num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->ms.state_idx = in_out_params->min_clk_index; + mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000); + mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000); + mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); + mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; + mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; + mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); + mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: --- START --- \n", __func__); + dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + + // dml2_printf_dml_policy(&mode_lib->ms.policy); + // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes); + // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &mode_lib->ms.MaxTotalDETInKByte, + &mode_lib->ms.NomDETInKByte, + &mode_lib->ms.MinCompressedBufferSizeInKByte); + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + mode_lib->ms.support.ScaleRatioAndTapsSupport = true; + // Many core tests are still setting scaling parameters "incorrectly" + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false + && (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0 + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0 + || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1) + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps + || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + || (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) + && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 || + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps || + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) { + mode_lib->ms.support.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + mode_lib->ms.support.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.support.SourceFormatPixelAndScanSupport = false; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + /* Output */ + &mode_lib->ms.BytePerPixelY[k], + &mode_lib->ms.BytePerPixelC[k], + &mode_lib->ms.BytePerPixelInDETY[k], + &mode_lib->ms.BytePerPixelInDETC[k], + &mode_lib->ms.Read256BlockHeightY[k], + &mode_lib->ms.Read256BlockHeightC[k], + &mode_lib->ms.Read256BlockWidthY[k], + &mode_lib->ms.Read256BlockWidthC[k], + &mode_lib->ms.MacroTileHeightY[k], + &mode_lib->ms.MacroTileHeightC[k], + &mode_lib->ms.MacroTileWidthY[k], + &mode_lib->ms.MacroTileWidthC[k], + &mode_lib->ms.surf_linear128_l[k], + &mode_lib->ms.surf_linear128_c[k]); + } + + /*Bandwidth Support Check*/ + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + } else { + mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * + display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + +#ifdef __DML_VBA_DEBUG__ + double old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; + dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); + dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); + dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]); +#endif + } + + // Writeback bandwidth + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total + / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; + } else { + mode_lib->ms.WriteBandwidth[k] = 0.0; + } + } + + /*Writeback Latency support check*/ + mode_lib->ms.support.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + mode_lib->ms.support.WritebackLatencySupport = false; + } + } + + /* Writeback Mode Support Check */ + s->TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= (unsigned int)mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true + && (display_cfg->plane_descriptors[k].stream_index == k)) { + s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; + } + } + + mode_lib->ms.support.EnoughWritebackUnits = 1; + if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) { + mode_lib->ms.support.EnoughWritebackUnits = false; + } + + /* Writeback Scale Ratio and Taps Support Check */ + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; + } + } + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + /* Output */ + &mode_lib->ms.PSCL_FACTOR[k], + &mode_lib->ms.PSCL_FACTOR_CHROMA[k], + &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]); + } + + // Max Viewport Size support + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + s->MaximumSwathWidthSupportLuma = 15360; + } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video + s->MaximumSwathWidthSupportLuma = 7680 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video + s->MaximumSwathWidthSupportLuma = 4320 + 16; + } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha + s->MaximumSwathWidthSupportLuma = 5120 + 16; + } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp + s->MaximumSwathWidthSupportLuma = 3072 + 16; + } else { + s->MaximumSwathWidthSupportLuma = 6144 + 16; + } + + if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0); + } else { + s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma; + } + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); + if (mode_lib->ms.BytePerPixelC[k] == 0.0) { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; + } else { + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = mode_lib->ip.line_buffer_size_bits * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); + } + mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + } + + /* Cursor Support Check */ + mode_lib->ms.support.CursorSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + mode_lib->ms.support.CursorSupport = false; + } + } + } + + /* Valid Pitch Check */ + mode_lib->ms.support.PitchSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + // data pitch + unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k]; + + if (mode_lib->ms.surf_linear128_l[k]) + alignment_l = alignment_l / 2; + + mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l); + if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k]; + + if (mode_lib->ms.surf_linear128_c[k]) + alignment_c = alignment_c / 2; + mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c); + } else { + mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch; + } + + if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch || + mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { + mode_lib->ms.support.PitchSupport = false; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); +#endif + } + + // meta pitch + if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch) + mode_lib->ms.support.PitchSupport = false; + + if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch, + display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]); + + if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch) + mode_lib->ms.support.PitchSupport = false; + } + } else { + mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0; + mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0; + } + } + + mode_lib->ms.support.ViewportExceedsSurface = false; + if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); +#endif + if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; + } + } + } + } + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2]; + CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7]; + CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26]; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10]; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11]; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1]; + CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs; + + // This calls is just to find out if there is enough DET space to support full vp in 1 pipe. + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + { + mode_lib->ms.TotalNumberOfActiveDPP = 0; + mode_lib->ms.support.TotalAvailablePipesSupport = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + false, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ip.max_num_dpp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + + /* Output */ + &s->TotalAvailablePipesSupportNoDSC, + &s->NumberOfDPPNoDSC, + &s->ODMModeNoDSC, + &s->RequiredDISPCLKPerSurfaceNoDSC); + + CalculateODMMode( + mode_lib->ip.maximum_pixels_per_line_per_dsc_unit, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode, + mode_lib->ms.max_dispclk_freq_mhz, + true, // DSCEnable + mode_lib->ms.TotalNumberOfActiveDPP, + mode_lib->ip.max_num_dpp, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + + /* Output */ + &s->TotalAvailablePipesSupportDSC, + &s->NumberOfDPPDSC, + &s->ODMModeDSC, + &s->RequiredDISPCLKPerSurfaceDSC); + + /*Number Of DSC Slices*/ + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (s->PixelClockBackEnd[k] > 4800) { + mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4)); + } else if (s->PixelClockBackEnd[k] > 2400) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 8; + } else if (s->PixelClockBackEnd[k] > 1200) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 4; + } else if (s->PixelClockBackEnd[k] > 340) { + mode_lib->ms.support.NumberOfDSCSlices[k] = 2; + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 1; + } + } else { + mode_lib->ms.support.NumberOfDSCSlices[k] = 0; + } + + if (s->ODMModeDSC == dml2_odm_mode_combine_2to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0); + else if (s->ODMModeDSC == dml2_odm_mode_combine_3to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 12; + else if (s->ODMModeDSC == dml2_odm_mode_combine_4to1) + mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0); + + CalculateOutputLink( + &mode_lib->scratch, + ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000), + ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000), + mode_lib->soc.phy_downspread_percent, + (display_cfg->plane_descriptors[k].stream_index == k), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + s->PixelClockBackEnd[k], + s->OutputBpp[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout, + s->ODMModeNoDSC, + s->ODMModeDSC, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate, + + /* Output */ + &mode_lib->ms.RequiresDSC[k], + &mode_lib->ms.RequiresFEC[k], + &mode_lib->ms.OutputBpp[k], + &mode_lib->ms.OutputType[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng + &mode_lib->ms.OutputRate[k], + &mode_lib->ms.RequiredSlots[k]); + + if (mode_lib->ms.RequiresDSC[k] == false) { + mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; + if (!s->TotalAvailablePipesSupportNoDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC; + } else { + mode_lib->ms.ODMMode[k] = s->ODMModeDSC; + mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC; + if (!s->TotalAvailablePipesSupportDSC) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; + } + dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + } + + // FIXME_DCN4 - add odm vs mpc use check + + // FIXME_DCN4 - add imall cap check + mode_lib->ms.support.incorrect_imall_usage = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mode_lib->ms.support.incorrect_imall_usage = 1; + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 4; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 3; + } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 2; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP++; + } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) { + mode_lib->ms.MPCCombine[k] = false; + mode_lib->ms.NoOfDPP[k] = 1; + if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + dml2_printf("ERROR: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + } + } else { + if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { + mode_lib->ms.MPCCombine[k] = true; + mode_lib->ms.NoOfDPP[k] = 2; + mode_lib->ms.TotalNumberOfActiveDPP++; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); +#endif + } + + if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp) + mode_lib->ms.support.TotalAvailablePipesSupport = false; + + + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0; + for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoOfDPP[k] == 1) + mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1; + } + + //DISPCLK/DPPCLK + mode_lib->ms.WritebackRequiredDISPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ip.writeback_line_buffer_buffer_size)); + } + } + + mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]); + } + + mode_lib->ms.GlobalDPPCLK = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k]; + mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]); + } + + mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz)); + } + + /* Total Available OTG, HDMIFRL, DP Support Check */ + s->TotalNumberOfActiveOTG = 0; + s->TotalNumberOfActiveHDMIFRL = 0; + s->TotalNumberOfActiveDP2p0 = 0; + s->TotalNumberOfActiveDP2p0Outputs = 0; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) { + s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1; + // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup + //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) { + s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1; + //} + } + } + } + + mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg); + mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs); + + mode_lib->ms.support.ExceededMultistreamSlots = false; + mode_lib->ms.support.LinkCapacitySupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false && + display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) { + mode_lib->ms.support.LinkCapacitySupport = false; + } + } + + mode_lib->ms.support.P2IWith420 = false; + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false; + mode_lib->ms.support.DSC422NativeNotSupported = false; + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false; + mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false; + mode_lib->ms.support.BPPForMultistreamNotIndicated = false; + mode_lib->ms.support.MultistreamWithHDMIOreDP = false; + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false; + mode_lib->ms.support.NotEnoughLanesForMSO = false; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) + mode_lib->ms.support.P2IWith420 = true; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) + mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) + mode_lib->ms.support.DSC422NativeNotSupported = true; + + if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) || + ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0)) + mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true; + + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1) { + // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na) + // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true; + // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0) + // mode_lib->ms.support.BPPForMultistreamNotIndicated = true; + // } + //} + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) { + // FIXME_STAGE2 + //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) { + // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n) + // mode_lib->ms.support.MultistreamWithHDMIOreDP = true; + //} + } + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4)) + mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true; + + if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) || + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4)) + mode_lib->ms.support.NotEnoughLanesForMSO = true; + } + } + + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k && + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl && + RequiredDTBCLK( + mode_lib->ms.RequiresDSC[k], + s->PixelClockBackEnd[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + mode_lib->ms.OutputBpp[k], + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout) > ((double)mode_lib->soc.clk_table.dtbclk.clk_values_khz[0] / 1000)) { + mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; + } + } + + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) { + s->DSCFormatFactor = 2; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) { + s->DSCFormatFactor = 1; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) { + s->DSCFormatFactor = 2; + } else { + s->DSCFormatFactor = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); +#endif + if (mode_lib->ms.RequiresDSC[k] == true) { + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor; + if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) { + mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); +#endif + } + } + } + } + + /* Check DSC Unit and Slices Support */ + mode_lib->ms.support.NotEnoughDSCSlices = false; + s->TotalDSCUnitsRequired = 0; + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.RequiresDSC[k] == true) { + s->NumDSCUnitRequired = 1; + + if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->NumDSCUnitRequired = 4; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->NumDSCUnitRequired = 3; + else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->NumDSCUnitRequired = 2; + + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit) + mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false; + s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired; + if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired) + mode_lib->ms.support.NotEnoughDSCSlices = true; + } + } + + mode_lib->ms.support.NotEnoughDSCUnits = false; + if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) { + mode_lib->ms.support.NotEnoughDSCUnits = true; + } + + /*DSC Delay per state*/ + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], + mode_lib->ms.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + mode_lib->ms.OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + mode_lib->ms.support.NumberOfDSCSlices[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + for (m = 0; m < mode_lib->ms.num_active_planes; m++) { + if (display_cfg->plane_descriptors[k].stream_index == m && mode_lib->ms.RequiresDSC[m] == true) { + mode_lib->ms.DSCDelay[k] = mode_lib->ms.DSCDelay[m]; + } + } + } + + // Figure out the swath and DET configuration after the num dpp per plane is figured out + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0]; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1]; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub; + CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3]; + CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1]; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport; + CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs; + + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.SurfaceSizeInMALL[k] = 0; + mode_lib->ms.support.ExceededMALLSize = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.Read256BlockWidthY, + mode_lib->ms.Read256BlockWidthC, + mode_lib->ms.Read256BlockHeightY, + mode_lib->ms.Read256BlockHeightC, + mode_lib->ms.MacroTileWidthY, + mode_lib->ms.MacroTileWidthC, + mode_lib->ms.MacroTileHeightY, + mode_lib->ms.MacroTileHeightC, + + /* Output */ + mode_lib->ms.SurfaceSizeInMALL, + &mode_lib->ms.support.ExceededMALLSize); + } + + mode_lib->ms.TotalNumberOfDCCActiveDPP = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k]; + } + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k]; + s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k]; + s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k]; + s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k]; + s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k]; + s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k]; + + s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12]; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13]; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA + CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16]; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18]; + CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19]; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20]; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21]; + CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22]; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23]; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24]; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25]; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26]; + CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27]; + CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28]; + CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29]; + CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30]; + CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31]; + CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32]; + CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33]; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + mode_lib->ms.support.PTEBufferSizeNotExceeded = true; + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.PTEBufferSizeNotExceeded = false; + + if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false) + mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); +#endif + + mode_lib->ms.UrgLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = CalculateTripToMemory( + mode_lib->ms.UrgLatency, + mode_lib->ms.FabricClock, + mode_lib->ms.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + bool cursor_not_enough_urgent_latency_hiding = 0; + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); +#endif + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + + // Output + &mode_lib->ms.UrgentBurstFactorLuma[k], + &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + CalculateDCFCLKDeepSleep( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->soc.return_bus_width_bytes, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; + } else { + mode_lib->ms.WritebackDelayTime[k] = 0.0; + } + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (display_cfg->plane_descriptors[m].stream_index == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.enable == true) { + mode_lib->ms.WritebackDelayTime[k] = math_max2(mode_lib->ms.WritebackDelayTime[k], + mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[m].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK); + } + } + } + } + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) { + if (display_cfg->plane_descriptors[k].stream_index == m) { + mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m]; + } + } + } + + // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + s->MaximumVStartup[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->ms.WritebackDelayTime[k]); + mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); +#endif + + /* Immediate Flip and MALL parameters */ + s->ImmediateFlipRequired = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip; + } + + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = + mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || + ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) && + (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))); + } + + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) || + ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)); + } + + s->FullFrameMALLPStateMethod = false; + s->SubViewportMALLPStateMethod = false; + s->PhantomPipeMALLPStateMethod = false; + s->SubViewportMALLRefreshGreaterThan120Hz = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + s->FullFrameMALLPStateMethod = true; + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) { + s->SubViewportMALLPStateMethod = true; + if (!display_cfg->overrides.enable_subvp_implicit_pmo) { + // For dv, small frame tests will have very high refresh rate + unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + if (refresh_rate > 120) + s->SubViewportMALLRefreshGreaterThan120Hz = true; + } + } + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + s->PhantomPipeMALLPStateMethod = true; + } + mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) || + (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); +#endif + + //Re-ordering Buffer Support Check + + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + + mode_lib->ms.support.max_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)) >= mode_lib->ms.support.max_urgent_latency_us) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } else { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + mode_lib->ms.support.ROBSupport = true; + } else { + mode_lib->ms.support.ROBSupport = false; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); +#endif + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)) >= mode_lib->ms.support.max_non_urgent_latency_us) { + mode_lib->ms.support.ROBUrgencyAvoidance = true; + } else { + mode_lib->ms.support.ROBUrgencyAvoidance = false; + } + } else { + mode_lib->ms.support.ROBUrgencyAvoidance = true; + } + + mode_lib->ms.support.OutstandingRequestsSupport = true; + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; + + mode_lib->ms.support.avg_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + + mode_lib->ms.support.avg_non_urgent_latency_us + = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + + double outstanding_latency_us = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); +#endif + } + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] + / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); + + if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsSupport = false; + } + + if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) { + mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); +#endif + } + } + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k]; + + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + mode_lib->ms.num_active_planes); + } + + // Calculate all the bandwidth available + // Need anothe bw for latency evaluation + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, // not used + mode_lib->ms.support.avg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_min_latency, + mode_lib->ms.support.urg_bandwidth_available, // not used + mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ms.dram_bw_mbps); + + calculate_bandwidth_available( + mode_lib->ms.support.avg_bandwidth_available_min, + mode_lib->ms.support.avg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_min, + mode_lib->ms.support.urg_bandwidth_available, + mode_lib->ms.support.urg_bandwidth_available_vm_only, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->ms.MaxDCFCLK, + mode_lib->ms.MaxFabricClock, + mode_lib->ms.dram_bw_mbps); + + + // Average BW support check + calculate_avg_bandwidth_required( + mode_lib->ms.support.avg_bandwidth_required, + // input + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + mode_lib->ms.mall_prefetch_sdp_overhead_factor); + + for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1; + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]); + mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]); + } + + mode_lib->ms.support.AvgBandwidthSupport = true; + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { + mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; + dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + + } + } + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { + mode_lib->ms.support.AvgBandwidthSupport = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); +#endif + } + } + } + + /* Prefetch Check */ + { + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + { + mode_lib->ms.support.PrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + s->AnyLinesForVMOrRowTooLarge = false; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { + s->AnyLinesForVMOrRowTooLarge = true; + } + } + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + line_time_us, + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.non_urg_bandwidth_required, + + display_cfg, + 0, // inc_flip_bw + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dpte_row_bw, + mode_lib->ms.meta_row_bw, + mode_lib->ms.prefetch_cursor_bw, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.final_flip_bw, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + + + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + s->dummy_bw, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + + // Input + display_cfg, + 1, // inc_flip_bw + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->ms.mall_prefetch_sdp_overhead_factor, + mode_lib->ms.mall_prefetch_dram_overhead_factor, + + mode_lib->ms.SurfaceReadBandwidthLuma, + mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.RequiredPrefetchPixelDataBWLuma, + mode_lib->ms.RequiredPrefetchPixelDataBWChroma, + mode_lib->ms.cursor_bw, + mode_lib->ms.dpte_row_bw, + mode_lib->ms.meta_row_bw, + mode_lib->ms.prefetch_cursor_bw, + mode_lib->ms.prefetch_vmrow_bw, + mode_lib->ms.final_flip_bw, + mode_lib->ms.UrgentBurstFactorLuma, + mode_lib->ms.UrgentBurstFactorChroma, + mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLumaPre, + mode_lib->ms.UrgentBurstFactorChromaPre, + mode_lib->ms.UrgentBurstFactorCursorPre); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + } // prefetch schedule + } + + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + mode_lib->ms.use_one_row_for_frame[k] = mode_lib->ms.use_one_row_for_frame[k]; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; // FIXME_STAGE2: no USR related bbox value + s->mSOCParameters.SMNLatency = 0; // FIXME_STAGE2 + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false /*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + } + + // End of Prefetch Check + + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + + /*Mode Support, Voltage State and SOC Configuration*/ + { + // s->dram_clock_change_support = 1; + // s->f_clock_change_support = 1; + + if (mode_lib->ms.support.ScaleRatioAndTapsSupport + && mode_lib->ms.support.SourceFormatPixelAndScanSupport + && mode_lib->ms.support.ViewportSizeSupport + && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion + && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated + && !mode_lib->ms.support.BPPForMultistreamNotIndicated + && !mode_lib->ms.support.MultistreamWithHDMIOreDP + && !mode_lib->ms.support.ExceededMultistreamSlots + && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink + && !mode_lib->ms.support.NotEnoughLanesForMSO + //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 + && !mode_lib->ms.support.P2IWith420 + && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP + && !mode_lib->ms.support.DSC422NativeNotSupported + && !mode_lib->ms.support.NotEnoughDSCUnits + && !mode_lib->ms.support.NotEnoughDSCSlices + && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported + && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport + && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported + && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState + && mode_lib->ms.support.ROBSupport + && mode_lib->ms.support.ROBUrgencyAvoidance + && mode_lib->ms.support.OutstandingRequestsSupport + && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance + && mode_lib->ms.support.DISPCLK_DPPCLK_Support + && mode_lib->ms.support.TotalAvailablePipesSupport + && mode_lib->ms.support.NumberOfOTGSupport + && mode_lib->ms.support.NumberOfHDMIFRLSupport + && mode_lib->ms.support.NumberOfDP2p0Support + && mode_lib->ms.support.EnoughWritebackUnits + && mode_lib->ms.support.WritebackLatencySupport + && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport + && mode_lib->ms.support.CursorSupport + && mode_lib->ms.support.PitchSupport + && !mode_lib->ms.support.ViewportExceedsSurface + && mode_lib->ms.support.PrefetchSupported + && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport + && mode_lib->ms.support.AvgBandwidthSupport + && mode_lib->ms.support.DynamicMetadataSupported + && mode_lib->ms.support.VRatioInPrefetchSupported + && mode_lib->ms.support.PTEBufferSizeNotExceeded + && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded + && !mode_lib->ms.support.ExceededMALLSize + && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { + // && s->dram_clock_change_support == true + // && s->f_clock_change_support == true + // && (/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement || */ mode_lib->ms.support.USRRetrainingSupport)) { + dml2_printf("DML::%s: mode is supported\n", __func__); + mode_lib->ms.support.ModeSupport = true; + } else { + dml2_printf("DML::%s: mode is NOT supported\n", __func__); + mode_lib->ms.support.ModeSupport = false; + } + } + + // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). + dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; + mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; + } + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; + } else { + mode_lib->ms.support.ODMMode[k] = dml2_odm_mode_bypass; + } + + mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; + mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; + mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k]; + mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); +#endif + } + +#if defined(__DML_VBA_DEBUG__) + if (!mode_lib->ms.support.ModeSupport) + dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, mode_lib->ms.support.ModeSupport, in_out_params->min_clk_index); + dml2_printf("DML::%s: --- DONE --- \n", __func__); +#endif + + if (mode_lib->ms.support.ModeSupport) { + *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; + return true; + } else { + return false; + } +} + +static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420); + if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) + dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = 0x%x\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = 0x%x\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = 0x%x\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->ROBUrgencyAvoidance == 0) + dml2_printf("DML: support: ROBUrgencyAvoidance = %d\n", support->ROBUrgencyAvoidance); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + dml2_printf("DML: ===================================== \n"); +} + +static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); + dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + return is_phantom; +} + +static void CalculateMaxDETAndMinCompressedBufferSize( + unsigned int ConfigReturnBufferSizeInKByte, + unsigned int ConfigReturnBufferSegmentSizeInKByte, + unsigned int ROBBufferSizeInKByte, + unsigned int MaxNumDPP, + unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size + unsigned int nomDETInKByteOverrideValue, // VBA_DELTA + bool is_mrq_present, + + // Output + unsigned int *MaxTotalDETInKByte, + unsigned int *nomDETInKByte, + unsigned int *MinCompressedBufferSizeInKByte) +{ + if (is_mrq_present) + *MaxTotalDETInKByte = (unsigned int)math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte) * 4 / 5, 64); + else + *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte; + + *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); + *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; + +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); +#endif + + if (nomDETInKByteOverrideEnable) { + *nomDETInKByte = nomDETInKByteOverrideValue; + dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + } +} + +static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd) +{ + //unsigned int num_active_planes = display_cfg->num_planes; + + //Progressive To Interlace Unit Effect + for (unsigned int k = 0; k < display_cfg->num_planes; ++k) { + PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) { + // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly + //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz; + } + } +} + +bool dml2_core_shared_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); break; + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); break; + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); break; + default: + return("dml2_core_internal_bw_unknown"); break; + }; +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); break; + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); break; + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); break; + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); break; + }; +} + +static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + +static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +static void CalculateBytePerPixelAndBlockSizes( + enum dml2_source_format_class SourcePixelFormat, + enum dml2_swizzle_mode SurfaceTiling, + unsigned int pitch_y, + unsigned int pitch_c, + + // Output + unsigned int *BytePerPixelY, + unsigned int *BytePerPixelC, + double *BytePerPixelDETY, + double *BytePerPixelDETC, + unsigned int *BlockHeight256BytesY, + unsigned int *BlockHeight256BytesC, + unsigned int *BlockWidth256BytesY, + unsigned int *BlockWidth256BytesC, + unsigned int *MacroTileHeightY, + unsigned int *MacroTileHeightC, + unsigned int *MacroTileWidthY, + unsigned int *MacroTileWidthC, + bool *surf_linear128_l, + bool *surf_linear128_c) +{ + *BytePerPixelDETY = 0; + *BytePerPixelDETC = 0; + *BytePerPixelY = 0; + *BytePerPixelC = 0; + + if (SourcePixelFormat == dml2_444_64) { + *BytePerPixelDETY = 8; + *BytePerPixelDETC = 0; + *BytePerPixelY = 8; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 0; + *BytePerPixelY = 4; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 0; + *BytePerPixelY = 2; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 0; + *BytePerPixelY = 1; + *BytePerPixelC = 0; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BytePerPixelDETY = 4; + *BytePerPixelDETC = 1; + *BytePerPixelY = 4; + *BytePerPixelC = 1; + } else if (SourcePixelFormat == dml2_420_8) { + *BytePerPixelDETY = 1; + *BytePerPixelDETC = 2; + *BytePerPixelY = 1; + *BytePerPixelC = 2; + } else if (SourcePixelFormat == dml2_420_12) { + *BytePerPixelDETY = 2; + *BytePerPixelDETC = 4; + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else if (SourcePixelFormat == dml2_420_10) { + *BytePerPixelDETY = (double)(4.0 / 3); + *BytePerPixelDETC = (double)(8.0 / 3); + *BytePerPixelY = 2; + *BytePerPixelC = 4; + } else { + dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML2_ASSERT(0); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y); + dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c); + dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); +#endif + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + *surf_linear128_l = 0; + *surf_linear128_c = 0; + } else { + if (SurfaceTiling == dml2_sw_linear) { + *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0); + + if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) + *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0); + } + } + + if (!(dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) { + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + } else if (SourcePixelFormat == dml2_444_64) { + *BlockHeight256BytesY = 4; + } else if (SourcePixelFormat == dml2_444_8) { + *BlockHeight256BytesY = 16; + } else { + *BlockHeight256BytesY = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockHeight256BytesC = 0; + *BlockWidth256BytesC = 0; + } else { // dual plane + if (SurfaceTiling == dml2_sw_linear) { + *BlockHeight256BytesY = 1; + *BlockHeight256BytesC = 1; + } else if (SourcePixelFormat == dml2_rgbe_alpha) { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 16; + } else if (SourcePixelFormat == dml2_420_8) { + *BlockHeight256BytesY = 16; + *BlockHeight256BytesC = 8; + } else { + *BlockHeight256BytesY = 8; + *BlockHeight256BytesC = 8; + } + *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; + *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); +#endif + + if (dml_get_gfx_version(SurfaceTiling) == 11) { + if (SurfaceTiling == dml2_gfx11_sw_linear) { + *MacroTileHeightY = *BlockHeight256BytesY; + *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; + } + } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) { + *MacroTileHeightY = 16 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 16 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; + } + } else { + *MacroTileHeightY = 32 * *BlockHeight256BytesY; + *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = 32 * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; + } + } + } else { + unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling); + unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling + + if (SurfaceTiling == dml2_sw_linear) { + macro_tile_scale = 1; + } else if (SurfaceTiling == dml2_sw_4kb_2d) { + macro_tile_scale = 4; + } else if (SurfaceTiling == dml2_sw_64kb_2d) { + macro_tile_scale = 16; + } else if (SurfaceTiling == dml2_sw_256kb_2d) { + macro_tile_scale = 32; + } else { + dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML2_ASSERT(0); + } + + *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; + *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY; + *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC; + if (*MacroTileHeightC == 0) { + *MacroTileWidthC = 0; + } else { + *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); +#endif +} + +static void CalculateSinglePipeDPPCLKAndSCLThroughput( + double HRatio, + double HRatioChroma, + double VRatio, + double VRatioChroma, + double MaxDCHUBToPSCLThroughput, + double MaxPSCLToLBThroughput, + double PixelClock, + enum dml2_source_format_class SourcePixelFormat, + unsigned int HTaps, + unsigned int HTapsChroma, + unsigned int VTaps, + unsigned int VTapsChroma, + + // Output + double *PSCL_THROUGHPUT, + double *PSCL_THROUGHPUT_CHROMA, + double *DPPCLKUsingSingleDPP) +{ + if (HRatio > 1) { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + + double DPPCLKUsingSingleDPPLuma; + double DPPCLKUsingSingleDPPChroma; + + DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1); + + if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) + DPPCLKUsingSingleDPPLuma = 2 * PixelClock; + + if (!dml2_core_shared_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) { + *PSCL_THROUGHPUT_CHROMA = 0; + *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; + } else { + if (HRatioChroma > 1) { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0)); + } else { + *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); + } + DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma), + HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); + if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) + DPPCLKUsingSingleDPPChroma = 2 * PixelClock; + *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); + } +} + +static void CalculateSwathWidth( + const struct dml2_display_cfg *display_cfg, + bool ForceSingleDPP, + unsigned int NumberOfActiveSurfaces, + enum dml2_odm_mode ODMMode[], + unsigned int BytePerPixY[], + unsigned int BytePerPixC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + bool surf_linear128_l[], + bool surf_linear128_c[], + unsigned int DPPPerSurface[], + + // Output + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + unsigned int SwathWidthSingleDPPY[], + unsigned int SwathWidthSingleDPPC[], + unsigned int SwathWidthY[], // per-pipe + unsigned int SwathWidthC[], // per-pipe + unsigned int MaximumSwathHeightY[], + unsigned int MaximumSwathHeightC[], + unsigned int swath_width_luma_ub[], // per-pipe + unsigned int swath_width_chroma_ub[]) // per-pipe +{ + enum dml2_odm_mode MainSurfaceODMMode; + double odm_hactive_factor = 1.0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + } else { + SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + MainSurfaceODMMode = ODMMode[k]; + for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { + if (display_cfg->plane_descriptors[k].stream_index == j) { + MainSurfaceODMMode = ODMMode[j]; + } + } + + if (ForceSingleDPP) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } else { + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1) + odm_hactive_factor = 4.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1) + odm_hactive_factor = 3.0; + else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1) + odm_hactive_factor = 2.0; + + if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) { + SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio))); + } else if (DPPPerSurface[k] == 2) { + SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); +#endif + + if (dml2_core_shared_is_420(display_cfg->plane_descriptors[k].pixel_format)) { + SwathWidthC[k] = SwathWidthY[k] / 2; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; + } else { + SwathWidthC[k] = SwathWidthY[k]; + SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; + } + + if (ForceSingleDPP == true) { + SwathWidthY[k] = SwathWidthSingleDPPY[k]; + SwathWidthC[k] = SwathWidthSingleDPPC[k]; + } + + unsigned int req_width_horz_y = Read256BytesBlockWidthY[k]; + unsigned int req_width_horz_c = Read256BytesBlockWidthC[k]; + + if (surf_linear128_l[k]) + req_width_horz_y = req_width_horz_y / 2; + + if (surf_linear128_c[k]) + req_width_horz_c = req_width_horz_c / 2; + + unsigned int surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y); + unsigned int surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]); + unsigned int surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); + unsigned int surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); +#endif + + req_per_swath_ub_l[k] = 0; + req_per_swath_ub_c[k] = 0; + if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { + MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y)); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y; + + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c)); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c; + } else { + swath_width_chroma_ub[k] = 0; + } + } else { + MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; + MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; + + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k]))); + } else { + swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k])); + } + req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k]; + if (BytePerPixC[k] > 0) { + if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k]))); + } else { + swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k])); + } + req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k]; + } else { + swath_width_chroma_ub[k] = 0; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); +#endif + + } +} + +static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear) +{ + bool unb_req_ok = false; + bool unb_req_en = false; + + unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear); + unb_req_en = unb_req_ok; + + if (unb_req_force_en) { + unb_req_en = unb_req_force_val && unb_req_ok; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); +#endif + return (unb_req_en); +} + +static void CalculateDETBufferSize(struct dml2_core_shared_calculate_det_buffer_size_params *p) +{ + unsigned int DETBufferSizePoolInKByte; + unsigned int NextDETBufferPieceInKByte; + bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES]; + bool NextPotentialSurfaceToAssignDETPieceFound; + unsigned int NextSurfaceToAssignDETPiece; + double TotalBandwidth; + double BandwidthOfSurfacesNotAssignedDETPiece; + unsigned int max_minDET; + unsigned int minDET; + unsigned int minDET_pipe; + unsigned int TotalBandwidthPerStream[DML2_MAX_PLANES] = { 0 }; + unsigned int TotalPixelRate = 0; + unsigned int DETBudgetPerStream[DML2_MAX_PLANES] = { 0 }; + unsigned int RemainingDETBudgetPerStream[DML2_MAX_PLANES] = { 0 }; + unsigned int IdealDETBudget, DeltaDETBudget; + bool MinimizeReallocationSuccess = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, p->NumberOfActiveSurfaces); + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, p->MaxTotalDETInKByte); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, p->MinCompressedBufferSizeInKByte); + dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, p->CompressedBufferSegmentSizeInkByte); +#endif + + // Note: Will use default det size if that fits 2 swaths + if (p->UnboundedRequestEnabled) { + if (p->display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) { + p->DETBufferSizeInKByte[0] = p->display_cfg->plane_descriptors[0].overrides.det_size_override_kb; + } else { + p->DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)p->full_swath_bytes_l[0] + (double)p->full_swath_bytes_c[0]) / 1024.0, p->ConfigReturnBufferSegmentSizeInkByte)); + } + *p->CompressedBufferSizeInkByte = p->ConfigReturnBufferSizeInKByte - p->DETBufferSizeInKByte[0]; + } else { + DETBufferSizePoolInKByte = p->MaxTotalDETInKByte; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DETBufferSizeInKByte[k] = 0; + if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) { + max_minDET = p->nomDETInKByte - p->ConfigReturnBufferSegmentSizeInkByte; + } else { + max_minDET = p->nomDETInKByte; + } + minDET = 128; + minDET_pipe = 0; + + // add DET resource until can hold 2 full swaths + while (minDET <= max_minDET && minDET_pipe == 0) { + if (2.0 * ((double)p->full_swath_bytes_l[k] + (double)p->full_swath_bytes_c[k]) / 1024.0 <= minDET) + minDET_pipe = minDET; + minDET = minDET + p->ConfigReturnBufferSegmentSizeInkByte; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, minDET); + dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET); + dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); +#endif + + if (minDET_pipe == 0) { + minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)p->full_swath_bytes_l[k] + (double)p->full_swath_bytes_c[k]) / 1024.0, p->ConfigReturnBufferSegmentSizeInkByte))); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe); +#endif + } + + if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + p->DETBufferSizeInKByte[k] = 0; + } else if (p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) { + p->DETBufferSizeInKByte[k] = p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb; + } else if ((p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { + p->DETBufferSizeInKByte[k] = minDET_pipe; + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * minDET_pipe; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte); +#endif + } + + if (p->display_cfg->minimize_det_reallocation) { + MinimizeReallocationSuccess = true; + // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global + // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on + // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a + // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane. + + // Calculate total pixel rate + for (unsigned int k = 0; k < p->display_cfg->num_streams; ++k) { + TotalPixelRate += p->display_cfg->stream_descriptors[k].timing.pixel_clock_khz; + } + + // Calculate per stream DET budget + for (unsigned int k = 0; k < p->display_cfg->num_streams; ++k) { + DETBudgetPerStream[k] = (unsigned int)((double)p->display_cfg->stream_descriptors[k].timing.pixel_clock_khz * p->MaxTotalDETInKByte / TotalPixelRate); + RemainingDETBudgetPerStream[k] = DETBudgetPerStream[k]; + } + + // Calculate the per stream total bandwidth + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + TotalBandwidthPerStream[p->display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]); + + // Check the minimum can be satisfied by budget + if (RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] >= p->DETBufferSizeInKByte[k]) { + RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] -= p->DETBufferSizeInKByte[k]; + } else { + MinimizeReallocationSuccess = false; + break; + } + } + } + + if (MinimizeReallocationSuccess) { + // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams + // budget proportionally across its planes + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + IdealDETBudget = (unsigned int)(((p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]) / TotalBandwidthPerStream[p->display_cfg->plane_descriptors[k].stream_index]) + * DETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index]); + + if (IdealDETBudget > p->DETBufferSizeInKByte[k]) { + DeltaDETBudget = IdealDETBudget - p->DETBufferSizeInKByte[k]; + if (DeltaDETBudget > RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index]) + DeltaDETBudget = RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index]; + + p->DETBufferSizeInKByte[k] += DeltaDETBudget; + RemainingDETBudgetPerStream[p->display_cfg->plane_descriptors[k].stream_index] -= DeltaDETBudget; + } + + // Split among the pipes per the plane + p->DETBufferSizeInKByte[k] = (unsigned int)((double)p->DETBufferSizeInKByte[k] / (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k])); + + // Round down to segment size + p->DETBufferSizeInKByte[k] = (p->DETBufferSizeInKByte[k] / p->CompressedBufferSegmentSizeInkByte) * p->CompressedBufferSegmentSizeInkByte; + } + } + } + } + + if (!MinimizeReallocationSuccess) { + TotalBandwidth = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + TotalBandwidth = TotalBandwidth + p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + } + dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__); +#endif + dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); + BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + } else if (p->display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]) * (double)p->DETBufferSizeInKByte[k] / (double)p->MaxTotalDETInKByte) >= ((p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]) / TotalBandwidth))) { + DETPieceAssignedToThisSurfaceAlready[k] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - p->ReadBandwidthLuma[k] - p->ReadBandwidthChroma[k]; + } else { + DETPieceAssignedToThisSurfaceAlready[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece); +#endif + } + + for (unsigned int j = 0; j < p->NumberOfActiveSurfaces; ++j) { + NextPotentialSurfaceToAssignDETPieceFound = false; + NextSurfaceToAssignDETPiece = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, p->ReadBandwidthLuma[k]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, p->ReadBandwidthChroma[k]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece); +#endif + if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || + p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k] < p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { + NextSurfaceToAssignDETPiece = k; + NextPotentialSurfaceToAssignDETPieceFound = true; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); +#endif + } + + if (NextPotentialSurfaceToAssignDETPieceFound) { + NextDETBufferPieceInKByte = (unsigned int)(math_min2( + math_round((double)DETBufferSizePoolInKByte * (p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece / + ((p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte)) + * (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte, + math_floor2((double)DETBufferSizePoolInKByte, (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]) * p->ConfigReturnBufferSegmentSizeInkByte))); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte); + dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece); + dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); + dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte); + dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (p->ForceSingleDPP ? 1 : p->DPPPerSurface[NextSurfaceToAssignDETPiece]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("to %u\n", p->DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); +#endif + + DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; + DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; + BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (p->ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + p->ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); + } + } + } + *p->CompressedBufferSizeInkByte = p->MinCompressedBufferSizeInKByte; + } + *p->CompressedBufferSizeInkByte = *p->CompressedBufferSizeInkByte * p->CompressedBufferSegmentSizeInkByte / p->ConfigReturnBufferSegmentSizeInkByte; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, p->DETBufferSizeInKByte[k], p->ReadBandwidthLuma[k] + p->ReadBandwidthChroma[k]); + } +#endif +} + +static double CalculateRequiredDispclk( + enum dml2_odm_mode ODMMode, + double PixelClock) +{ + + if (ODMMode == dml2_odm_mode_combine_4to1) { + return PixelClock / 4.0; + } else if (ODMMode == dml2_odm_mode_combine_3to1) { + return PixelClock / 3.0; + } else if (ODMMode == dml2_odm_mode_combine_2to1) { + return PixelClock / 2.0; + } else { + return PixelClock; + } +} + +static double TruncToValidBPP( + struct dml2_core_shared_TruncToValidBPP_locals *l, + double LinkBitRate, + unsigned int Lanes, + unsigned int HTotal, + unsigned int HActive, + double PixelClock, + double DesiredBPP, + bool DSCEnable, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class Format, + unsigned int DSCInputBitPerComponent, + unsigned int DSCSlices, + unsigned int AudioRate, + unsigned int AudioLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + + // Output + unsigned int *RequiredSlots) +{ + double MaxLinkBPP; + unsigned int MinDSCBPP; + double MaxDSCBPP; + unsigned int NonDSCBPP0; + unsigned int NonDSCBPP1; + unsigned int NonDSCBPP2; + enum dml2_odm_mode ODMMode; + + if (Format == dml2_420) { + NonDSCBPP0 = 12; + NonDSCBPP1 = 15; + NonDSCBPP2 = 18; + MinDSCBPP = 6; + MaxDSCBPP = 16; + } else if (Format == dml2_444) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 30; + NonDSCBPP2 = 36; + MinDSCBPP = 8; + MaxDSCBPP = 16; + } else { + if (Output == dml2_hdmi || Output == dml2_hdmifrl) { + NonDSCBPP0 = 24; + NonDSCBPP1 = 24; + NonDSCBPP2 = 24; + } else { + NonDSCBPP0 = 16; + NonDSCBPP1 = 20; + NonDSCBPP2 = 24; + } + if (Format == dml2_n422 || Output == dml2_hdmifrl) { + MinDSCBPP = 7; + MaxDSCBPP = 16; + } else { + MinDSCBPP = 8; + MaxDSCBPP = 16; + } + } + if (Output == dml2_dp2p0) { + MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0; + } else if (DSCEnable && Output == dml2_dp) { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100); + } else { + MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock; + } + + ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC; + + if (ODMMode == dml2_odm_mode_split_1to2) { + MaxLinkBPP = 2 * MaxLinkBPP; + } + + if (DesiredBPP == 0) { + if (DSCEnable) { + if (MaxLinkBPP < MinDSCBPP) { + return __DML2_CALCS_DPP_INVALID__; + } else if (MaxLinkBPP >= MaxDSCBPP) { + return MaxDSCBPP; + } else { + return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0; + } + } else { + if (MaxLinkBPP >= NonDSCBPP2) { + return NonDSCBPP2; + } else if (MaxLinkBPP >= NonDSCBPP1) { + return NonDSCBPP1; + } else if (MaxLinkBPP >= NonDSCBPP0) { + return NonDSCBPP0; + } else { + return __DML2_CALCS_DPP_INVALID__; + } + } + } else { + if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) || + (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { + return __DML2_CALCS_DPP_INVALID__; + } else { + return DesiredBPP; + } + } +} + +// updated for dcn4 +static unsigned int dscceComputeDelay( + unsigned int bpc, + double BPP, + unsigned int sliceWidth, + unsigned int numSlices, + enum dml2_output_format_class pixelFormat, + enum dml2_output_encoder_class Output) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; + + + if (pixelFormat == dml2_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else if (pixelFormat == dml2_444) + pixelsPerClock = 1; + else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + pixelsPerClock = 2; + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock)); + + //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format) + slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth; + + padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0; + + if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) { + if ((initial_xmit_delay + padding_pixels) % 3 == 1) { + initial_xmit_delay++; + } + } + + + //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard + if (bpc == 8) + ssm_group_priming_delay = 83; + else if (bpc == 10) + ssm_group_priming_delay = 91; + else if (bpc == 12) + ssm_group_priming_delay = 115; + else if (bpc == 14) + ssm_group_priming_delay = 123; + else + ssm_group_priming_delay = 128; + + //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice + slice_width_groups = (slice_width_modified + 2) / 3; + + //determine number of padded pixels in the last group of a slice line, computed as + slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; + + + + + //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered + number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; + + //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay + //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay + ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd; + + //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels + ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3; + + //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay + groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; + + + //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice + //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice + lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next + + //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached + //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay + additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0; + + //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block) + ssm_pipeline_delay = 2; + + //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block) + obsm_pipeline_delay = 1; + + //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes + if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl) + cycles_per_group = 6; + else + cycles_per_group = 3; + //delay of the bit stream contruction layer in pixels is the sum of: + //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice + //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice) + //3. additional group of delay if initial transmit delay is reached exactly in a group + //4. ssm and obsm pipeline delay (i.e., clock cycles of delay) + group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay; + pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay; + + //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) + pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: bpc: %u\n", __func__, bpc); + dml2_printf("DML::%s: BPP: %f\n", __func__, BPP); + dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices); + dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + dml2_printf("DML::%s: Output: %u\n", __func__, Output); + dml2_printf("DML::%s: pixels: %u\n", __func__, pixels); +#endif + return pixels; +} + + +//updated in dcn4 +static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) +{ + unsigned int Delay = 0; + unsigned int dispclk_per_dscclk = 3; + + // sfr + Delay = Delay + 2; + + if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + dispclk_per_dscclk = 3 * 2; + } + + if (pixelFormat == dml2_420) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 5; + + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + + + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else if (pixelFormat == dml2_s422) { + //dscc top delay for pixel compression layer + Delay = Delay + 17 * dispclk_per_dscclk; + + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } else { + //dscc top delay for pixel compression layer + Delay = Delay + 16 * dispclk_per_dscclk; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 1 + 4 * dispclk_per_dscclk; + // dscc - output cdc fifo + Delay = Delay + 3 + 1 * dispclk_per_dscclk; + + // dscc - cdc uncertainty + Delay = Delay + 3 + 3 * dispclk_per_dscclk; + } + + // sft + Delay = Delay + 1; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + dml2_printf("DML::%s: Delay = %u\n", __func__, Delay); +#endif + + return Delay; +} + +static unsigned int CalculateHostVMDynamicLevels( + bool GPUVMEnable, + bool HostVMEnable, + unsigned int HostVMMinPageSize, + unsigned int HostVMMaxNonCachedPageTableLevels) +{ + unsigned int HostVMDynamicLevels = 0; + + if (GPUVMEnable && HostVMEnable) { + if (HostVMMinPageSize < 2048) + HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; + else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1); + else + HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2); + } else { + HostVMDynamicLevels = 0; + } + return HostVMDynamicLevels; +} + +static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p) +{ + unsigned int extra_dpde_bytes; + unsigned int extra_mpde_bytes; + unsigned int MacroTileSizeBytes; + unsigned int vp_height_dpte_ub; + + unsigned int meta_surface_bytes; + unsigned int vm_bytes; + unsigned int vp_height_meta_ub; + + *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes; + *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes; + if (p->SurfaceTiling == dml2_sw_linear) { + *p->meta_row_height = 32; + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways? + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->meta_row_height = *p->MetaRequestHeight; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); + } else { + *p->meta_row_height = *p->MetaRequestWidth; + if (p->ViewportStationary && p->NumberOfDPPs == 1) { + *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight)); + } else { + *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight); + } + *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0); + } + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } else { + vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes); + } + + meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); +#endif + if (p->GPUVMEnable == true) { + double meta_vmpg_bytes = 4.0 * 1024.0; + *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double)(meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); + extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1); + } else { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + } + + if (!p->DCCEnable || !p->mrq_present) { + *p->meta_pte_bytes_per_frame_ub = 0; + extra_mpde_bytes = 0; + *p->meta_row_bytes = 0; + } + + if (!p->GPUVMEnable) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRowStorage = 0; + *p->dpte_row_width_ub = 0; + *p->dpte_row_height = 0; + *p->dpte_row_height_linear = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + *p->dpte_row_width_ub_one_row_per_frame = 0; + *p->dpte_row_height_one_row_per_frame = 0; + *p->vmpg_width = 0; + *p->vmpg_height = 0; + *p->PixelPTEReqWidth = 0; + *p->PixelPTEReqHeight = 0; + *p->PTERequestSize = 0; + *p->dpde0_bytes_per_frame_ub = 0; + return 0; + } + + MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight; + + if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) { + vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight)); + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight); + } else { + vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight); + } + + if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) { + *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1)); + extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2); + } else { + *p->dpde0_bytes_per_frame_ub = 0; + extra_dpde_bytes = 0; + } + + vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); +#endif + + unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->PixelPTEReqHeight = 1; + *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel; + *p->PTERequestSize = 64; + + *p->vmpg_height = 1; + *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel; + } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile + // one 64KB tile, is 16x16x256B req + *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes; + *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes; + *p->PTERequestSize = 128; + + *p->vmpg_height = *p->PixelPTEReqHeight; + *p->vmpg_width = *p->PixelPTEReqWidth; + } else { + // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways + *p->PixelPTEReqHeight = p->MacroTileHeight; + *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + *p->PTERequestSize = 64; + + *p->vmpg_height = p->MacroTileHeight; + *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); + + if (p->GPUVMEnable == true) { + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); + DML2_ASSERT(0); + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch); + dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); +#endif + + *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; + *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + + if (p->SurfaceTiling == dml2_sw_linear) { + *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1)))); + *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth); + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize); + *p->dpte_row_height_linear = 0; + + // VBA_DELTA, VBA doesn't have programming value for pte row height linear. + *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1); + if (*p->dpte_row_height_linear > 128) + *p->dpte_row_height_linear = 128; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); +#endif + + } else if (!dml_is_vertical_rotation(p->RotationAngle)) { + *p->dpte_row_height = *p->PixelPTEReqHeight; + + if (p->GPUVMMinPageSizeKBytes > 64) { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth); + } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); +#endif + + *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; + } else { + *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth)); + + if (p->ViewportStationary && (p->NumberOfDPPs == 1)) { + *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight)); + } else { + *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight); + } + + *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); +#endif + } + + if (p->GPUVMEnable != true) { + *p->PixelPTEBytesPerRow = 0; + *p->PixelPTEBytesPerRow_one_row_per_frame = 0; + } + + *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); +#endif + + return vm_bytes; +} // CalculateVMAndRowBytes + +static unsigned int CalculatePrefetchSourceLines( + double VRatio, + unsigned int VTaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + enum dml2_rotation_angle RotationAngle, + bool mirrored, + bool ViewportStationary, + unsigned int SwathWidth, + unsigned int ViewportHeight, + unsigned int ViewportXStart, + unsigned int ViewportYStart, + + // Output + unsigned int *VInitPreFill, + unsigned int *MaxNumSwath) +{ + + unsigned int vp_start_rot = 0; + unsigned int sw0_tmp = 0; + unsigned int MaxPartialSwath = 0; + double numLines = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps); + dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); +#endif + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); + else + *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1)); + + if (ViewportStationary) { + if (RotationAngle == dml2_rotation_180) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); + } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) { + vp_start_rot = ViewportXStart; + } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) { + vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); + } else { + vp_start_rot = ViewportYStart; + } + sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); + if (sw0_tmp < *VInitPreFill) { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1); + } else { + *MaxNumSwath = 1; + } + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight)); + } else { + *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1); + if (*VInitPreFill > 1) { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight)); + } else { + MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight)); + } + } + numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); +#endif + return (unsigned int)(numLines); + +} + +static void CalculateRowBandwidth( + bool GPUVMEnable, + bool use_one_row_for_frame, + enum dml2_source_format_class SourcePixelFormat, + double VRatio, + double VRatioChroma, + bool DCCEnable, + double LineTime, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + + bool mrq_present, + unsigned int meta_row_bytes_per_row_ub_l, + unsigned int meta_row_bytes_per_row_ub_c, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + + // Output + double *dpte_row_bw, + double *meta_row_bw) +{ + if (!DCCEnable || !mrq_present) { + *meta_row_bw = 0; + } else if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime) + + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } +} + +static void CalculateMALLUseForStaticScreen( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int SurfaceSizeInMALL[], + bool one_row_per_frame_fits_in_buffer[], + + // Output + bool is_using_mall_for_ss[]) +{ + + unsigned int SurfaceToAddToMALL; + bool CanAddAnotherSurfaceToMALL; + unsigned int TotalSurfaceSizeInMALL; + + TotalSurfaceSizeInMALL = 0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable); + if (is_using_mall_for_ss[k]) + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); +#endif + } + + SurfaceToAddToMALL = 0; + CanAddAnotherSurfaceToMALL = true; + while (CanAddAnotherSurfaceToMALL) { + CanAddAnotherSurfaceToMALL = false; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 && + !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] && + (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { + CanAddAnotherSurfaceToMALL = true; + SurfaceToAddToMALL = k; + dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + } + } + if (CanAddAnotherSurfaceToMALL) { + is_using_mall_for_ss[SurfaceToAddToMALL] = true; + TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); +#endif + } + } +} + +static void CalculateDCCConfiguration( + bool DCCEnabled, + bool DCCProgrammingAssumesScanDirectionUnknown, + enum dml2_source_format_class SourcePixelFormat, + unsigned int SurfaceWidthLuma, + unsigned int SurfaceWidthChroma, + unsigned int SurfaceHeightLuma, + unsigned int SurfaceHeightChroma, + unsigned int nomDETInKByte, + unsigned int RequestHeight256ByteLuma, + unsigned int RequestHeight256ByteChroma, + enum dml2_swizzle_mode TilingFormat, + unsigned int BytePerPixelY, + unsigned int BytePerPixelC, + double BytePerPixelDETY, + double BytePerPixelDETC, + enum dml2_rotation_angle RotationAngle, + + // Output + enum dml2_core_internal_request_type *RequestLuma, + enum dml2_core_internal_request_type *RequestChroma, + unsigned int *MaxUncompressedBlockLuma, + unsigned int *MaxUncompressedBlockChroma, + unsigned int *MaxCompressedBlockLuma, + unsigned int *MaxCompressedBlockChroma, + unsigned int *IndependentBlockLuma, + unsigned int *IndependentBlockChroma) +{ + unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; + + unsigned int yuv420; + unsigned int horz_div_l; + unsigned int horz_div_c; + unsigned int vert_div_l; + unsigned int vert_div_c; + + unsigned int swath_buf_size; + double detile_buf_vp_horz_limit; + double detile_buf_vp_vert_limit; + + yuv420 = dml2_core_shared_is_420(SourcePixelFormat); + horz_div_l = 1; + horz_div_c = 1; + vert_div_l = 1; + vert_div_c = 1; + + if (BytePerPixelY == 1) + vert_div_l = 0; + if (BytePerPixelC == 1) + vert_div_c = 0; + + if (BytePerPixelC == 0) { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); + } else { + swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; + detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); + detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); + } + + if (SourcePixelFormat == dml2_420_10) { + detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; + detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; + } + + detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16); + detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16); + + unsigned int MAS_vp_horz_limit; + unsigned int MAS_vp_vert_limit; + unsigned int max_vp_horz_width; + unsigned int max_vp_vert_height; + unsigned int eff_surf_width_l; + unsigned int eff_surf_width_c; + unsigned int eff_surf_height_l; + unsigned int eff_surf_height_c; + + unsigned int full_swath_bytes_horz_wc_l; + unsigned int full_swath_bytes_horz_wc_c; + unsigned int full_swath_bytes_vert_wc_l; + unsigned int full_swath_bytes_vert_wc_c; + + MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144; + MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); + max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit)); + max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit)); + eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); + eff_surf_width_c = eff_surf_width_l / (1 + yuv420); + eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); + eff_surf_height_c = eff_surf_height_l / (1 + yuv420); + + full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; + full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; + if (BytePerPixelC > 0) { + full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; + full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; + } else { + full_swath_bytes_horz_wc_c = 0; + full_swath_bytes_vert_wc_c = 0; + } + + if (SourcePixelFormat == dml2_420_10) { + full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0)); + full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0)); + } + + unsigned int req128_horz_wc_l; + unsigned int req128_horz_wc_c; + unsigned int req128_vert_wc_l; + unsigned int req128_vert_wc_c; + + if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 0; + } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 0; + req128_horz_wc_c = 1; + } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { + req128_horz_wc_l = 1; + req128_horz_wc_c = 0; + } else { + req128_horz_wc_l = 1; + req128_horz_wc_c = 1; + } + + if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 0; + } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 0; + req128_vert_wc_c = 1; + } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { + req128_vert_wc_l = 1; + req128_vert_wc_c = 0; + } else { + req128_vert_wc_l = 1; + req128_vert_wc_c = 1; + } + + unsigned int segment_order_horz_contiguous_luma; + unsigned int segment_order_horz_contiguous_chroma; + unsigned int segment_order_vert_contiguous_luma; + unsigned int segment_order_vert_contiguous_chroma; + + if (BytePerPixelY == 2) { + segment_order_horz_contiguous_luma = 0; + segment_order_vert_contiguous_luma = 1; + } else { + segment_order_horz_contiguous_luma = 1; + segment_order_vert_contiguous_luma = 0; + } + + if (BytePerPixelC == 2) { + segment_order_horz_contiguous_chroma = 0; + segment_order_vert_contiguous_chroma = 1; + } else { + segment_order_horz_contiguous_chroma = 1; + segment_order_vert_contiguous_chroma = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); +#endif + if (DCCProgrammingAssumesScanDirectionUnknown == true) { + if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else if (!dml_is_vertical_rotation(RotationAngle)) { + if (req128_horz_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_horz_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_horz_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } else { + if (req128_vert_wc_l == 0) { + *RequestLuma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_luma == 0) { + *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous; + } + if (req128_vert_wc_c == 0) { + *RequestChroma = dml2_core_internal_request_type_256_bytes; + } else if (segment_order_vert_contiguous_chroma == 0) { + *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous; + } else { + *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous; + } + } + + if (*RequestLuma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 256; + *IndependentBlockLuma = 0; + } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 128; + *IndependentBlockLuma = 128; + } else { + *MaxUncompressedBlockLuma = 256; + *MaxCompressedBlockLuma = 64; + *IndependentBlockLuma = 64; + } + + if (*RequestChroma == dml2_core_internal_request_type_256_bytes) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 256; + *IndependentBlockChroma = 0; + } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 128; + *IndependentBlockChroma = 128; + } else { + *MaxUncompressedBlockChroma = 256; + *MaxCompressedBlockChroma = 64; + *IndependentBlockChroma = 64; + } + + if (DCCEnabled != true || BytePerPixelC == 0) { + *MaxUncompressedBlockChroma = 0; + *MaxCompressedBlockChroma = 0; + *IndependentBlockChroma = 0; + } + + if (DCCEnabled != true) { + *MaxUncompressedBlockLuma = 0; + *MaxCompressedBlockLuma = 0; + *IndependentBlockLuma = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); +#endif + +} + +static void calculate_mcache_row_bytes( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_row_bytes_params *p) +{ + unsigned int vmpg_bytes = 0; + unsigned int blk_bytes = 0; + float meta_per_mvmpg_per_channel = 0; + unsigned int est_blk_per_vmpg = 2; + unsigned int mvmpg_per_row_ub = 0; + unsigned int full_vp_width_mvmpg_aligned = 0; + unsigned int full_vp_height_mvmpg_aligned = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans); + dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width); + dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height); + dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); +#endif + DML2_ASSERT(p->mcache_line_size_bytes != 0); + DML2_ASSERT(p->mcache_size_bytes != 0); + + *p->mvmpg_width = 0; + *p->mvmpg_height = 0; + + if (p->full_vp_height == 0 && p->full_vp_width == 0) { + *p->num_mcaches = 0; + *p->mcache_row_bytes = 0; + } else { + blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode); + + // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height. + // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block. + // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end. + *p->mvmpg_width = p->blk_width; + *p->mvmpg_height = p->blk_height; + if (p->gpuvm_enable) { + if (vmpg_bytes >= blk_bytes) { + *p->mvmpg_width = p->vmpg_width; + *p->mvmpg_height = p->vmpg_height; + } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { + dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML2_ASSERT(0); + } + } + + //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c + full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width)); + full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height)); + + *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned; + + //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes. + if (!p->surf_vert) { //horizontal access + if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes) + *p->meta_row_width_ub = full_vp_width_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width; + } else { //vertical access + if (p->vp_stationary == 1) + *p->meta_row_width_ub = full_vp_height_mvmpg_aligned; + else + *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height; + mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height; + } + + unsigned int meta_per_mvmpg_per_channel_ub = 0; + + if (p->gpuvm_enable) { + meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + + //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic + if (p->surf_vert && vmpg_bytes > blk_bytes) { + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + } + + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom + } else { + meta_per_mvmpg_per_channel = (float)blk_bytes / 256 / p->num_chans; + + if (!p->surf_vert) + *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; + else + *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); + } + + meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes); + + //but for 4KB vmpg with 64KB tile blk + if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096)) + meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub; + + // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes, + // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes. + if (p->gpuvm_enable || !p->surf_vert) { + *p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub; + } else { // horizontal and gpuvm disable + *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256; + *p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes); + } + + *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref + *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1); + + unsigned int mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub; + *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); + + DML2_ASSERT(*p->num_mcaches > 0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); +#endif +} + +static void calculate_mcache_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_mcache_setting_params *p) +{ + unsigned int n; + + struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals; + memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals)); + + *p->num_mcaches_l = 0; + *p->mcache_row_bytes_l = 0; + *p->dcc_dram_bw_nom_overhead_factor_l = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_l = 1.0; + + *p->num_mcaches_c = 0; + *p->mcache_row_bytes_c = 0; + *p->dcc_dram_bw_nom_overhead_factor_c = 1.0; + *p->dcc_dram_bw_pref_overhead_factor_c = 1.0; + + *p->mall_comb_mcache_l = 0; + *p->mall_comb_mcache_c = 0; + *p->lc_comb_mcache = 0; + + if (!p->dcc_enable) + return; + + l->is_dual_plane = dml2_core_shared_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha; + + l->l_p.num_chans = p->num_chans; + l->l_p.mem_word_bytes = p->mem_word_bytes; + l->l_p.mcache_size_bytes = p->mcache_size_bytes; + l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->l_p.gpuvm_enable = p->gpuvm_enable; + l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->l_p.surf_vert = p->surf_vert; + l->l_p.vp_stationary = p->vp_stationary; + l->l_p.tiling_mode = p->tiling_mode; + l->l_p.vp_start_x = p->vp_start_x_l; + l->l_p.vp_start_y = p->vp_start_y_l; + l->l_p.full_vp_width = p->full_vp_width_l; + l->l_p.full_vp_height = p->full_vp_height_l; + l->l_p.blk_width = p->blk_width_l; + l->l_p.blk_height = p->blk_height_l; + l->l_p.vmpg_width = p->vmpg_width_l; + l->l_p.vmpg_height = p->vmpg_height_l; + l->l_p.full_swath_bytes = p->full_swath_bytes_l; + l->l_p.bytes_per_pixel = p->bytes_per_pixel_l; + + // output + l->l_p.num_mcaches = p->num_mcaches_l; + l->l_p.mcache_row_bytes = p->mcache_row_bytes_l; + l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l; + l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l; + l->l_p.mvmpg_width = &l->mvmpg_width_l; + l->l_p.mvmpg_height = &l->mvmpg_height_l; + l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l; + l->l_p.meta_row_width_ub = &l->meta_row_width_l; + l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; + + calculate_mcache_row_bytes(scratch, &l->l_p); + dml2_assert(*p->num_mcaches_l > 0); + + if (l->is_dual_plane) { + l->c_p.num_chans = p->num_chans; + l->c_p.mem_word_bytes = p->mem_word_bytes; + l->c_p.mcache_size_bytes = p->mcache_size_bytes; + l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes; + l->c_p.gpuvm_enable = p->gpuvm_enable; + l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes; + l->c_p.surf_vert = p->surf_vert; + l->c_p.vp_stationary = p->vp_stationary; + l->c_p.tiling_mode = p->tiling_mode; + l->c_p.vp_start_x = p->vp_start_x_c; + l->c_p.vp_start_y = p->vp_start_y_c; + l->c_p.full_vp_width = p->full_vp_width_c; + l->c_p.full_vp_height = p->full_vp_height_c; + l->c_p.blk_width = p->blk_width_c; + l->c_p.blk_height = p->blk_height_c; + l->c_p.vmpg_width = p->vmpg_width_c; + l->c_p.vmpg_height = p->vmpg_height_c; + l->c_p.full_swath_bytes = p->full_swath_bytes_c; + l->c_p.bytes_per_pixel = p->bytes_per_pixel_c; + + // output + l->c_p.num_mcaches = p->num_mcaches_c; + l->c_p.mcache_row_bytes = p->mcache_row_bytes_c; + l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c; + l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c; + l->c_p.mvmpg_width = &l->mvmpg_width_c; + l->c_p.mvmpg_height = &l->mvmpg_height_c; + l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c; + l->c_p.meta_row_width_ub = &l->meta_row_width_c; + l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; + + calculate_mcache_row_bytes(scratch, &l->c_p); + dml2_assert(*p->num_mcaches_c > 0); + } + + // Sharing for iMALL access + l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes; + l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes; + l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l; + l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c; + + if (p->imall_enable) { + *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes); + + if (l->is_dual_plane) + *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes); + } + + if (!p->surf_vert) // horizonatal access + l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2; + else // vertical access + l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; + + // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + if (l->is_dual_plane) { + l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; + + if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { + l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) + + (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1))); + } + *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + + if (l->is_dual_plane) { + dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + } +#endif + // calculate split_coordinate + l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l; + l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c; + + for (n = 0; n < *p->num_mcaches_l - 1; n++) { + p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l; + } + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c - 1; n++) { + p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c; + } + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + } +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + if (l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_c; n++) + dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + } +#endif + + // Luma/Chroma combine in the last mcache + // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary + if (*p->lc_comb_mcache && l->is_dual_plane) { + for (n = 0; n < *p->num_mcaches_l - 1; n++) + p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; + p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; + + for (n = 0; n < *p->num_mcaches_c - 1; n++) + p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c; + p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c; + +#ifdef __DML_VBA_DEBUG__ + for (n = 0; n < *p->num_mcaches_l; n++) + dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + + for (n = 0; n < *p->num_mcaches_c; n++) + dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); +#endif + } + + *p->mcache_shift_granularity_l = l->mvmpg_access_width_l; + *p->mcache_shift_granularity_c = l->mvmpg_access_width_c; +} + +static void calculate_mall_bw_overhead_factor( + double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref + double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes) +{ + for (unsigned int k = 0; k < num_active_planes; ++k) { + mall_prefetch_sdp_overhead_factor[k] = 1.0; + mall_prefetch_dram_overhead_factor[k] = 1.0; + + // SDP - on the return side + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return + mall_prefetch_sdp_overhead_factor[k] = 1.25; + else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + mall_prefetch_sdp_overhead_factor[k] = 0.25; + + // DRAM + if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) + mall_prefetch_dram_overhead_factor[k] = 2.0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); +#endif + } +} + +static double dml_get_return_bandwidth_available( + const struct dml2_soc_bb *soc, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool is_avg_bw, + bool is_hvm_en, + bool is_hvm_only, + double dcflk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + double return_bw_mbps = 0.; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz; + double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; + double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; + + double derate_sdp_factor = 1; + double derate_fabric_factor = 1; + double derate_dram_factor = 1; + + if (is_avg_bw) { + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0; + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0; + } + } else { // urgent bw + if (state_type == dml2_core_internal_soc_state_svp_prefetch) { + derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0; + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0; + } + } else { // just assume sys_active + derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0; + derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0; + + if (is_hvm_en) { + if (is_hvm_only) + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0; + else + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0; + } else { + derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0; + } + } + } + + double derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor; + double derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor; + double derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor; + + if (bw_type == dml2_core_internal_bw_sdp) + return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth); + else // dml2_core_internal_bw_dram + return_bw_mbps = derate_dram_bandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz); + dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); +#endif + return return_bw_mbps; +} + +static void calculate_bandwidth_available( + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max], + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max], + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max], + + const struct dml2_soc_bb *soc, + bool HostVMEnable, + double dcfclk_mhz, + double fclk_mhz, + double dram_bw_mbps) +{ + unsigned int n, m; + + dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + + // Calculate all the bandwidth availabe + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, + m, // soc_state + n, // bw_type + 1, // avg_bw + HostVMEnable, + 0, // hvm_only + dcfclk_mhz, + fclk_mhz, + dram_bw_mbps); + + urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + + + dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); + + // urg_bandwidth_available_vm_only is indexed by soc_state + if (n == dml2_core_internal_bw_dram) { + urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps); + } + } + + avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); +#endif + } +} + +static void calculate_avg_bandwidth_required( + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int num_active_planes, + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double cursor_bw[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double mall_prefetch_dram_overhead_factor[], + double mall_prefetch_sdp_overhead_factor[]) +{ + unsigned int n, m, k; + + // Average BW support check + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram + avg_bandwidth_required[m][n] = 0; + } + } + + // SysActive and SVP Prefetch AVG bandwidth Check + for (k = 0; k < num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: plane %0d\n", __func__, k); + dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); +#endif + + double sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; + double dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k]; + double dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k]; + + // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation? + // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + } + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k]; + avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); +#endif + } +} + +static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateVMRowAndSwath_params *p) +{ + struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals; + + s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_page_table_levels); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->hostvm_enable == true) { + p->vm_group_bytes[k] = 512; + p->dpte_group_bytes[k] = 512; + } else if (p->display_cfg->gpuvm_enable == true) { + p->vm_group_bytes[k] = 2048; + if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + p->dpte_group_bytes[k] = 512; + } else { + p->dpte_group_bytes[k] = 2048; + } + } else { + p->vm_group_bytes[k] = 0; + p->dpte_group_bytes[k] = 0; + } + + if (dml2_core_shared_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) { + if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { + s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2; + s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k]; + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma; + s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k]; + + s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatioChroma, + p->myPipe[k].VTapsChroma, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightC, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthC[k], + p->myPipe[k].ViewportHeightC, + p->myPipe[k].ViewportXStartC, + p->myPipe[k].ViewportYStartC, + + // Output + &p->VInitPreFillC[k], + &p->MaxNumSwathC[k]); + } else { + s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma; + s->PTEBufferSizeInRequestsForChroma[k] = 0; + s->PixelPTEBytesPerRowC[k] = 0; + s->PixelPTEBytesPerRowStorageC[k] = 0; + s->vm_bytes_c = 0; + p->MaxNumSwathC[k] = 0; + p->PrefetchSourceLinesC[k] = 0; + s->dpte_row_height_chroma_one_row_per_frame[k] = 0; + s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + } + + scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary; + scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable; + scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface; + scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY; + scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY; + scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat; + scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling; + scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY; + scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle; + scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight; + scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart; + scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart; + scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable; + scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels; + scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k]; + scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY; + scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY; + scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY; + scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]); + scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY; + scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present; + + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k]; + scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k]; + scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k]; + scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k]; + scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k]; + + scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k]; + scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k]; + + s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params); + + p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + p->myPipe[k].VRatio, + p->myPipe[k].VTaps, + p->myPipe[k].InterlaceEnable, + p->myPipe[k].ProgressiveToInterlaceUnitInOPP, + p->myPipe[k].SwathHeightY, + p->myPipe[k].RotationAngle, + p->myPipe[k].mirrored, + p->myPipe[k].ViewportStationary, + p->SwathWidthY[k], + p->myPipe[k].ViewportHeight, + p->myPipe[k].ViewportXStart, + p->myPipe[k].ViewportYStart, + + // Output + &p->VInitPreFillY[k], + &p->MaxNumSwathY[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); +#endif + p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); + p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); +#endif + if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { + p->PTEBufferSizeNotExceeded[k] = true; + } else { + p->PTEBufferSizeNotExceeded[k] = false; + } + + s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] && + s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); +#ifdef __DML_VBA_DEBUG__ + if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + + dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + } +#endif + } + + CalculateMALLUseForStaticScreen( + p->display_cfg, + p->NumberOfActiveSurfaces, + p->MALLAllocatedForDCN, + p->SurfaceSizeInMALL, + s->one_row_per_frame_fits_in_buffer, + // Output + p->is_using_mall_for_ss); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->gpuvm_enable) { + if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) { + p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value; + } + p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64); + p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12); + } else { + p->PTE_BUFFER_MODE[k] = 0; + p->BIGK_FRAGMENT_SIZE[k] = 0; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DCCMetaBufferSizeNotExceeded[k] = true; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); +#endif + p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || + (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); + + p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame); + + if (p->use_one_row_for_frame[k]) { + p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k]; + p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k]; + p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k]; + p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k]; + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k]; + p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k]; + } + + if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) { + p->DCCMetaBufferSizeNotExceeded[k] = true; + } else { + p->DCCMetaBufferSizeNotExceeded[k] = false; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); +#endif + } + + s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels); + s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels); + p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k]; + + // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs + if (p->use_one_row_for_frame[k]) + p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2; + + CalculateRowBandwidth( + p->display_cfg->gpuvm_enable, + p->use_one_row_for_frame[k], + p->myPipe[k].SourcePixelFormat, + p->myPipe[k].VRatio, + p->myPipe[k].VRatioChroma, + p->myPipe[k].DCCEnable, + p->myPipe[k].HTotal / p->myPipe[k].PixelClock, + s->PixelPTEBytesPerRowY[k], + s->PixelPTEBytesPerRowC[k], + p->dpte_row_height_luma[k], + p->dpte_row_height_chroma[k], + + p->mrq_present, + s->meta_row_bytes_per_row_ub_l[k], + s->meta_row_bytes_per_row_ub_c[k], + p->meta_row_height_luma[k], + p->meta_row_height_chroma[k], + + // Output + &p->dpte_row_bw[k], + &p->meta_row_bw[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); +#endif + } +} + +static double CalculateUrgentLatency( + double UrgentLatencyPixelDataOnly, + double UrgentLatencyPixelMixedWithVMData, + double UrgentLatencyVMDataOnly, + bool DoUrgentLatencyAdjustment, + double UrgentLatencyAdjustmentFabricClockComponent, + double UrgentLatencyAdjustmentFabricClockReference, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int urgent_ramp_uclk_cycles, + unsigned int df_qos_response_time_fclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_urgent_ramp_latency_margin, + double fabric_max_transport_latency_margin) +{ + double urgent_latency = 0; + if (qos_type == dml2_qos_param_type_dcn4) { + urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); + } else { + urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); + if (DoUrgentLatencyAdjustment == true) { + urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); + } + } +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + } else { + dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + } + dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); +#endif + return urgent_latency; +} + +static double CalculateTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int trip_to_memory_uclk_cycles, + unsigned int max_round_trip_to_furthest_cs_fclk_cycles, + unsigned int mall_overhead_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4) { + trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); + dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + } else { + dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); +#endif + + + return trip_to_memory_us; +} + +static double CalculateMetaTripToMemory( + double UrgLatency, + double FabricClock, + double uclk_freq_mhz, + enum dml2_qos_param_type qos_type, + unsigned int meta_trip_to_memory_uclk_cycles, + unsigned int meta_trip_to_memory_fclk_cycles, + double umc_max_latency_margin, + double fabric_max_transport_latency_margin) +{ + double meta_trip_to_memory_us; + if (qos_type == dml2_qos_param_type_dcn4) { + meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); + } else { + meta_trip_to_memory_us = UrgLatency; + } + +#ifdef __DML_VBA_DEBUG__ + if (qos_type == dml2_qos_param_type_dcn4) { + dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); + dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + } else { + dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + } + dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); +#endif + + + return meta_trip_to_memory_us; +} + +static void calculate_cursor_req_attributes( + unsigned int cursor_width, + unsigned int cursor_bpp, + + // output + unsigned int *cursor_lines_per_chunk, + unsigned int *cursor_bytes_per_line, + unsigned int *cursor_bytes_per_chunk, + unsigned int *cursor_bytes) +{ + unsigned int cursor_pitch = 0; + unsigned int cursor_bytes_per_req = 0; + unsigned int cursor_width_bytes = 0; + unsigned int cursor_height = 0; + + //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. + //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B + //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width + if (cursor_bpp == 2) + cursor_pitch = 256; + else + cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1); + + //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. + + cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1); + if (cursor_width_bytes <= 64) + cursor_bytes_per_req = 64; + else if (cursor_width_bytes <= 128) + cursor_bytes_per_req = 128; + else + cursor_bytes_per_req = 256; + + //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line. + *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req); + + //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines. + if (cursor_bpp == 2) { + *cursor_lines_per_chunk = 16; + } else if (cursor_bpp == 32) { + if (cursor_width <= 32) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 4; + else + *cursor_lines_per_chunk = 2; + } else if (cursor_bpp == 64) { + if (cursor_width <= 16) + *cursor_lines_per_chunk = 16; + else if (cursor_width <= 32) + *cursor_lines_per_chunk = 8; + else if (cursor_width <= 64) + *cursor_lines_per_chunk = 4; + else if (cursor_width <= 128) + *cursor_lines_per_chunk = 2; + else + *cursor_lines_per_chunk = 1; + } else { + if (cursor_width > 0) { + dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + dml2_assert(0); + } + } + + *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk; + + // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize. + // Only cursor_width is provided for worst case sizing so assume that the cursor is square + cursor_height = cursor_width; + *cursor_bytes = *cursor_bytes_per_line * cursor_height; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width); + dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch); +#endif + + // register CURSOR_PITCH = math_log2(cursor_pitch) - 6; + // register CURSOR_LINES_PER_CHUNK = math_log2(*cursor_lines_per_chunk); +} + +static void calculate_cursor_urgent_burst_factor( + unsigned int CursorBufferSize, + unsigned int CursorWidth, + unsigned int cursor_bytes_per_chunk, + unsigned int cursor_lines_per_chunk, + double LineTime, + double UrgentLatency, + + double *UrgentBurstFactorCursor, + bool *NotEnoughUrgentLatencyHiding) +{ + unsigned int LinesInCursorBuffer = 0; + double CursorBufferSizeInTime = 0; + + if (CursorWidth > 0) { + LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk; + + CursorBufferSizeInTime = LinesInCursorBuffer * LineTime; + if (CursorBufferSizeInTime - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorCursor = 0; + } else { + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + + } +} + +static void CalculateUrgentBurstFactor( + const struct dml2_plane_parameters *plane_cfg, + unsigned int swath_width_luma_ub, + unsigned int swath_width_chroma_ub, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double LineTime, + double UrgentLatency, + double VRatio, + double VRatioC, + double BytePerPixelInDETY, + double BytePerPixelInDETC, + unsigned int DETBufferSizeY, + unsigned int DETBufferSizeC, + // Output + double *UrgentBurstFactorLuma, + double *UrgentBurstFactorChroma, + bool *NotEnoughUrgentLatencyHiding) +{ + double LinesInDETLuma; + double LinesInDETChroma; + double DETBufferSizeInTimeLuma; + double DETBufferSizeInTimeChroma; + + *NotEnoughUrgentLatencyHiding = 0; + *UrgentBurstFactorLuma = 0; + *UrgentBurstFactorChroma = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC); + dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); +#endif + DML2_ASSERT(VRatio > 0); + + LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; + + DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; + if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorLuma = 0; + } else { + *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); + } + + if (BytePerPixelInDETC > 0) { + LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub; + + DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC; + if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { + *NotEnoughUrgentLatencyHiding = 1; + *UrgentBurstFactorChroma = 0; + } else { + *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); +#endif + +} + +static void CalculateDCFCLKDeepSleep( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelY[], + unsigned int BytePerPixelC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + unsigned int DPPPerSurface[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + unsigned int ReturnBusWidth, + + // Output + double *DCFClkDeepSleep) +{ + double DisplayPipeLineDeliveryTimeLuma; + double DisplayPipeLineDeliveryTimeChroma; + double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES]; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (BytePerPixelC[k] > 0) { + DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, + __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); + } else { + DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; + } + DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); +#endif + } + + double ReadBandwidth = 0.0; + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; + } + + *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); + } + dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); +} + +static double CalculateWriteBackDelay( + enum dml2_source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackVTaps, + unsigned int WritebackDestinationWidth, + unsigned int WritebackDestinationHeight, + unsigned int WritebackSourceHeight, + unsigned int HTotal) +{ + double CalculateWriteBackDelay; + double Line_length; + double Output_lines_last_notclamped; + double WritebackVInit; + + WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; + Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); + Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0); + if (Output_lines_last_notclamped < 0) { + CalculateWriteBackDelay = 0; + } else { + CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; + } + return CalculateWriteBackDelay; +} + +static unsigned int CalculateMaxVStartup( + bool ptoi_supported, + unsigned int vblank_nom_default_us, + const struct dml2_timing_cfg *timing, + double write_back_delay_us) +{ + unsigned int vblank_size = 0; + unsigned int max_vstartup_lines = 0; + + double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000); + unsigned int vblank_actual = timing->v_total - timing->v_active; + unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0); + unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line); + unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input; + + vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail); + + if (timing->interlaced && !ptoi_supported) + max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0)); + else + max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom); + dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us); + dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); +#endif + return max_vstartup_lines; +} + +static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p) +{ + struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals *l = &scratch->CalculateSwathAndDETConfiguration_locals; + memset(l, 0, sizeof(struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + } +#endif + CalculateSwathWidth( + p->display_cfg, + p->ForceSingleDPP, + p->NumberOfActiveSurfaces, + p->ODMMode, + p->BytePerPixY, + p->BytePerPixC, + p->Read256BytesBlockHeightY, + p->Read256BytesBlockHeightC, + p->Read256BytesBlockWidthY, + p->Read256BytesBlockWidthC, + p->surf_linear128_l, + p->surf_linear128_c, + p->DPPPerSurface, + + // Output + p->req_per_swath_ub_l, + p->req_per_swath_ub_c, + l->SwathWidthSingleDPP, + l->SwathWidthSingleDPPChroma, + p->SwathWidth, + p->SwathWidthChroma, + l->MaximumSwathHeightY, + l->MaximumSwathHeightC, + p->swath_width_luma_ub, + p->swath_width_chroma_ub); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * l->MaximumSwathHeightY[k]); + p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * l->MaximumSwathHeightC[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, l->MaximumSwathHeightY[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, l->MaximumSwathHeightC[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); +#endif + if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { + p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); + p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256)); + } + } + + unsigned int TotalActiveDPP = 0; + bool NoChromaOrLinear = true; + unsigned int SurfaceDoingUnboundedRequest = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]); + if (p->DPPPerSurface[k] > 0) + SurfaceDoingUnboundedRequest = k; + if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha + || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { + NoChromaOrLinear = false; + } + l->SwathTimeValueUs[k] = (unsigned int) ((double)l->MaximumSwathHeightY[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total + / p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000); + } + + *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear); + + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.display_cfg = p->display_cfg; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ForceSingleDPP = p->ForceSingleDPP; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.NumberOfActiveSurfaces = p->NumberOfActiveSurfaces; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.UnboundedRequestEnabled = *p->UnboundedRequestEnabled; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.nomDETInKByte = p->nomDETInKByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.MaxTotalDETInKByte = p->MaxTotalDETInKByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ConfigReturnBufferSizeInKByte = p->ConfigReturnBufferSizeInKByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.MinCompressedBufferSizeInKByte = p->MinCompressedBufferSizeInKByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ConfigReturnBufferSegmentSizeInkByte = p->ConfigReturnBufferSegmentSizeInkByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.CompressedBufferSegmentSizeInkByte = p->CompressedBufferSegmentSizeInkByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ReadBandwidthLuma = p->ReadBandwidthLuma; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.ReadBandwidthChroma = p->ReadBandwidthChroma; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.full_swath_bytes_l = p->full_swath_bytes_l; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.full_swath_bytes_c = p->full_swath_bytes_c; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.DPPPerSurface = p->DPPPerSurface; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.DETBufferSizeInKByte = p->DETBufferSizeInKByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.CompressedBufferSizeInkByte = p->CompressedBufferSizeInkByte; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.swath_time_value_us = l->SwathTimeValueUs; + scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params.bestEffortMinActiveLatencyHidingUs = p->display_cfg->overrides.best_effort_min_active_latency_hiding_us; + if (p->funcs->calculate_det_buffer_size) { + p->funcs->calculate_det_buffer_size(&scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params); + } else { + CalculateDETBufferSize(&scratch->CalculateSwathAndDETConfiguration_locals.calculate_det_buffer_size_params); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); +#endif + + unsigned int DETBufferSizeInKByteForSwathCalculation; + *p->ViewportSizeSupport = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + + DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); +#endif + + if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = l->MaximumSwathHeightY[k]; + p->SwathHeightC[k] = l->MaximumSwathHeightC[k]; + l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = l->MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = l->MaximumSwathHeightC[k]; + l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k]; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = 256; + + } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + p->SwathHeightY[k] = l->MaximumSwathHeightY[k]; + p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; + l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k]; + l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = 256; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + + } else { + p->SwathHeightY[k] = l->MaximumSwathHeightY[k] / 2; + p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; + l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; + l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + } + + if (p->SwathHeightC[k] == 0) + p->request_size_bytes_chroma[k] = 0; + + if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || + p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { + *p->ViewportSizeSupport = false; + p->ViewportSizeSupportPerSurface[k] = false; + } else { + p->ViewportSizeSupportPerSurface[k] = true; + } + + if (p->SwathHeightC[k] == 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; + p->DETBufferSizeC[k] = 0; + } else if (l->RoundedUpSwathSizeBytesY[k] <= 1.5 * l->RoundedUpSwathSizeBytesC[k]) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; + } else { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); +#endif + p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); + p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, l->RoundedUpSwathSizeBytesY[k]); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, l->RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); +#endif + + } + + const long TTUFIFODEPTH = 8; + const long MAXIMUMCOMPRESSION = 4; + *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; + if (*p->UnboundedRequestEnabled) { + *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(l->RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, l->RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); +#endif + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); +#endif + + *p->hw_debug5 = false; + if (!p->mrq_present) { + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(*p->UnboundedRequestEnabled) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (l->RoundedUpSwathSizeBytesY[k] + l->RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, l->RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); +#endif + } + } +} + +static void CalculateODMMode( + unsigned int MaximumPixelsPerLinePerDSCUnit, + unsigned int HActive, + enum dml2_output_format_class OutFormat, + enum dml2_output_encoder_class Output, + enum dml2_odm_mode ODMUse, + double MaxDispclk, + bool DSCEnable, + unsigned int TotalNumberOfActiveDPP, + unsigned int MaxNumDPP, + double PixelClock, + + // Output + bool *TotalAvailablePipesSupport, + unsigned int *NumberOfDPP, + enum dml2_odm_mode *ODMMode, + double *RequiredDISPCLKPerSurface) +{ + double SurfaceRequiredDISPCLKWithoutODMCombine; + double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + double SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + double SurfaceRequiredDISPCLKWithODMCombineFourToOne; + + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); + *TotalAvailablePipesSupport = true; + + if (OutFormat == dml2_420) { + if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH) + *TotalAvailablePipesSupport = false; + else if (HActive > 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_4to1; + else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_3to1; + else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH) + ODMUse = dml2_odm_mode_combine_2to1; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_2to1) + *TotalAvailablePipesSupport = false; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_3to1) + *TotalAvailablePipesSupport = false; + if (Output == dml2_hdmi && ODMUse == dml2_odm_mode_combine_4to1) + *TotalAvailablePipesSupport = false; + } + + if (ODMUse == dml2_odm_mode_bypass || ODMUse == dml2_odm_mode_auto) + *ODMMode = dml2_odm_mode_bypass; + else if (ODMUse == dml2_odm_mode_combine_2to1) + *ODMMode = dml2_odm_mode_combine_2to1; + else if (ODMUse == dml2_odm_mode_combine_3to1) + *ODMMode = dml2_odm_mode_combine_3to1; + else if (ODMUse == dml2_odm_mode_combine_4to1) + *ODMMode = dml2_odm_mode_combine_4to1; + else if (ODMUse == dml2_odm_mode_split_1to2) + *ODMMode = dml2_odm_mode_split_1to2; + else if (ODMUse == dml2_odm_mode_mso_1to2) + *ODMMode = dml2_odm_mode_mso_1to2; + else if (ODMUse == dml2_odm_mode_mso_1to4) + *ODMMode = dml2_odm_mode_mso_1to4; + + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; + *NumberOfDPP = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse); + dml2_printf("DML::%s: Output = %d\n", __func__, Output); + dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); +#endif + + if (ODMUse == dml2_odm_mode_combine_4to1 || (ODMUse == dml2_odm_mode_auto && + (SurfaceRequiredDISPCLKWithODMCombineThreeToOne > MaxDispclk || (DSCEnable && (HActive > 3 * MaximumPixelsPerLinePerDSCUnit))))) { + if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_4to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; + *NumberOfDPP = 4; + } else { + *TotalAvailablePipesSupport = false; + } + } else if (ODMUse == dml2_odm_mode_combine_3to1 || (ODMUse == dml2_odm_mode_auto && + ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) || + (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))))) { + if (TotalNumberOfActiveDPP + 3 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_3to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineThreeToOne; + *NumberOfDPP = 3; + } else { + *TotalAvailablePipesSupport = false; + } + + } else if (ODMUse == dml2_odm_mode_combine_2to1 || (ODMUse == dml2_odm_mode_auto && + ((SurfaceRequiredDISPCLKWithoutODMCombine > MaxDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) || + (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))))) { + if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { + *ODMMode = dml2_odm_mode_combine_2to1; + *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; + *NumberOfDPP = 2; + } else { + *TotalAvailablePipesSupport = false; + } + + } else { + if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) { + *NumberOfDPP = 1; + } else { + *TotalAvailablePipesSupport = false; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); +#endif + +} + +static void CalculateOutputLink( + struct dml2_core_internal_scratch *s, + double PHYCLK, + double PHYCLKD18, + double PHYCLKD32, + double Downspreading, + bool IsMainSurfaceUsingTheIndicatedTiming, + enum dml2_output_encoder_class Output, + enum dml2_output_format_class OutputFormat, + unsigned int HTotal, + unsigned int HActive, + double PixelClockBackEnd, + double ForcedOutputLinkBPP, + unsigned int DSCInputBitPerComponent, + unsigned int NumberOfDSCSlices, + double AudioSampleRate, + unsigned int AudioSampleLayout, + enum dml2_odm_mode ODMModeNoDSC, + enum dml2_odm_mode ODMModeDSC, + enum dml2_dsc_enable_option DSCEnable, + unsigned int OutputLinkDPLanes, + enum dml2_output_link_dp_rate OutputLinkDPRate, + + // Output + bool *RequiresDSC, + bool *RequiresFEC, + double *OutBpp, + enum dml2_core_internal_output_type *OutputType, + enum dml2_core_internal_output_type_rate *OutputRate, + unsigned int *RequiredSlots) +{ + bool LinkDSCEnable; + unsigned int dummy; + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = 0; + + *OutputType = dml2_core_internal_output_type_unknown; + *OutputRate = dml2_core_internal_output_rate_unknown; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + dml2_printf("DML::%s: IsMainSurfaceUsingTheIndicatedTiming = %u\n", __func__, IsMainSurfaceUsingTheIndicatedTiming); + dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output); + dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); +#endif + if (IsMainSurfaceUsingTheIndicatedTiming) { + if (Output == dml2_hdmi) { + *RequiresDSC = false; + *RequiresFEC = false; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = "HDMI"; + *OutputType = dml2_core_internal_output_type_hdmi; + } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp || Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + if (Output == dml2_dp2p0) { + *RequiresFEC = true; + } else { + *RequiresFEC = false; + } + } + if (Output == dml2_dp2p0) { + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLKD32 < 13500 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR10"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLKD32 < 20000 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR13p5"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000 / 32) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " UHBR20"; + *OutputType = dml2_core_internal_output_type_dp2p0; + *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20; + } + } else { // output is dp or edp + *OutBpp = 0; + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR2"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2; + } + if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + if (Output == dml2_dp) { + *RequiresFEC = true; + } + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, + OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); + } + //OutputTypeAndRate = Output & " HBR3"; + *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp; + *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3; + } + } + } else if (Output == dml2_hdmifrl) { + if (DSCEnable == dml2_dsc_enable) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + } else { + *RequiresDSC = false; + LinkDSCEnable = false; + *RequiresFEC = false; + } + *OutBpp = 0; + if (PHYCLKD18 >= 3000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "3x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x3"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3; + } + if (*OutBpp == 0 && PHYCLKD18 >= 6000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "6x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 8000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + //OutputTypeAndRate = Output & "8x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 10000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000 / 18) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "10x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4; + } + if (*OutBpp == 0 && PHYCLKD18 >= 12000 / 18) { + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + *RequiresDSC = true; + LinkDSCEnable = true; + *RequiresFEC = true; + *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy); + } + //OutputTypeAndRate = Output & "12x4"; + *OutputType = dml2_core_internal_output_type_hdmifrl; + *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4; + } + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp); +#endif +} + +static double CalculateWriteBackDISPCLK( + enum dml2_source_format_class WritebackPixelFormat, + double PixelClock, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackHTaps, + unsigned int WritebackVTaps, + unsigned int WritebackSourceWidth, + unsigned int WritebackDestinationWidth, + unsigned int HTotal, + unsigned int WritebackLineBufferSize) +{ + double DISPCLK_H, DISPCLK_V, DISPCLK_HB; + + DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio; + DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal; + DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth; + return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); +} + +static double RequiredDTBCLK( + bool DSCEnable, + double PixelClock, + enum dml2_output_format_class OutputFormat, + double OutputBpp, + unsigned int DSCSlices, + unsigned int HTotal, + unsigned int HActive, + unsigned int AudioRate, + unsigned int AudioLayout) +{ + if (DSCEnable != true) { + return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + } else { + double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2); + double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; + } +} + +static unsigned int DSCDelayRequirement( + bool DSCEnabled, + enum dml2_odm_mode ODMMode, + unsigned int DSCInputBitPerComponent, + double OutputBpp, + unsigned int HActive, + unsigned int HTotal, + unsigned int NumberOfDSCSlices, + enum dml2_output_format_class OutputFormat, + enum dml2_output_encoder_class Output, + double PixelClock, + double PixelClockBackEnd) +{ + unsigned int DSCDelayRequirement_val = 0; + unsigned int NumberOfDSCSlicesFactor = 1; + + if (DSCEnabled == true && OutputBpp != 0) { + + if (ODMMode == dml2_odm_mode_combine_4to1) + NumberOfDSCSlicesFactor = 4; + else if (ODMMode == dml2_odm_mode_combine_3to1) + NumberOfDSCSlicesFactor = 3; + else if (ODMMode == dml2_odm_mode_combine_2to1) + NumberOfDSCSlicesFactor = 2; + + DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)), + (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output)); + + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0)); + DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd); + + } else { + DSCDelayRequirement_val = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode); + dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); +#endif + + return DSCDelayRequirement_val; +} + +static void CalculateSurfaceSizeInMall( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int MALLAllocatedForDCN, + unsigned int BytesPerPixelY[], + unsigned int BytesPerPixelC[], + unsigned int Read256BytesBlockWidthY[], + unsigned int Read256BytesBlockWidthC[], + unsigned int Read256BytesBlockHeightY[], + unsigned int Read256BytesBlockHeightC[], + unsigned int ReadBlockWidthY[], + unsigned int ReadBlockWidthC[], + unsigned int ReadBlockHeightY[], + unsigned int ReadBlockHeightC[], + + // Output + unsigned int SurfaceSizeInMALL[], + bool *ExceededMALLSize) +{ + unsigned int TotalSurfaceSizeInMALLForSS = 0; + unsigned int TotalSurfaceSizeInMALLForSubVP = 0; + unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition; + const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface; + + if (composition->viewport.stationary) { + SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - + math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) * + math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]), + math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - + math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]); + + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - + math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) * + math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]), + math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]); + } + if (surface->dcc.enable) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2(surface->plane0.width, 8 * Read256BytesBlockWidthY[k]), + math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - + math_floor2(composition->viewport.plane0.x_start, 8 * Read256BytesBlockWidthY[k])) * + math_min2(math_ceil2(surface->plane0.height, 8 * Read256BytesBlockHeightY[k]), + math_floor2(composition->viewport.plane0.y_start + composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - + math_floor2(composition->viewport.plane0.y_start, 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_min2(math_ceil2(surface->plane1.width, 8 * Read256BytesBlockWidthC[k]), + math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - + math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockWidthC[k])) * + math_min2(math_ceil2(surface->plane1.height, 8 * Read256BytesBlockHeightC[k]), + math_floor2(composition->viewport.plane1.y_start + composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - + math_floor2(composition->viewport.plane1.y_start, 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256); + } + } + } else { + SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]); + if (ReadBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]); + } + if (surface->dcc.enable) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) * + math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); + + if (Read256BytesBlockWidthC[k] > 0) { + SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] + + math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) * + math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256); + } + } + } + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + /* SS and Subvp counted separate as they are never used at the same time */ + if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) + TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k]; + else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable) + TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k]; + } + + *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || + (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); +#endif +} + +static void calculate_tdlut_setting( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_calculate_tdlut_setting_params *p) +{ + if (!p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = 0; + *p->tdlut_opt_time = 0; + *p->tdlut_drain_time = 0; + *p->tdlut_bytes_per_group = 0; + *p->tdlut_pte_bytes_per_frame = 0; + *p->tdlut_bytes_per_frame = 0; + return; + } + + // locals + unsigned int tdlut_bpe = 8; + unsigned int tdlut_width; + unsigned int tdlut_pitch_bytes; + unsigned int tdlut_footprint_bytes; + unsigned int vmpg_bytes; + unsigned int tdlut_vmpg_per_frame; + unsigned int tdlut_pte_req_per_frame; + unsigned int tdlut_bytes_per_line; + unsigned int tdlut_delivery_cycles; + double tdlut_drain_rate; + unsigned int tdlut_mpc_width; + unsigned int tdlut_bytes_per_group_simple; + + if (p->tdlut_mpc_width_flag) { + tdlut_mpc_width = 33; + tdlut_bytes_per_group_simple = 39 * 256; + } else { + tdlut_mpc_width = 17; + tdlut_bytes_per_group_simple = 10 * 256; + } + + vmpg_bytes = p->gpuvm_page_size_kbytes * 1024; + + if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 4916; + else + tdlut_width = 35940; + } else { + if (p->tdlut_width_mode == dml2_tdlut_width_17_cube) + tdlut_width = 17; + else // dml2_tdlut_width_33_cube + tdlut_width = 33; + } + + if (p->is_gfx11) + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment + else + tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) + tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width; + else + tdlut_footprint_bytes = tdlut_pitch_bytes; + + if (!p->gpuvm_enable) { + tdlut_vmpg_per_frame = 0; + tdlut_pte_req_per_frame = 0; + } else { + tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1; + tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1; + } + tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request + *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64; + + if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) { + //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice + *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; + *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; + //the delivery cycles is DispClk cycles per line * number of lines * number of slices + tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + } else { + //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements + *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); + *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; + tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width / 2.0, 1); + tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; + } + + //the tdlut is fetched during the 2 row times of prefetch. + if (p->setup_for_tdlut) { + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); + dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); +#endif +} + +static void CalculateTarb( + const struct dml2_display_cfg *display_cfg, + unsigned int PixelChunkSizeInKByte, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + double ReturnBW, + unsigned int MetaChunkSize, + + // output + double *Tarb, + double *Tarb_prefetch) +{ + double extra_bytes = 0; + double extra_bytes_prefetch = 0; + double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_page_table_levels); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024); + + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + extra_bytes = extra_bytes + (MetaChunkSize * 1024); + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + extra_bytes = extra_bytes + tdlut_bytes_per_group[k]; + } + + extra_bytes_prefetch = extra_bytes; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (display_cfg->gpuvm_enable == true) { + extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; + extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch; + } + } + *Tarb = extra_bytes / ReturnBW; + *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); +#endif +} + +static double CalculateTWait( + long reserved_vblank_time_ns, + double UrgentLatency, + double Ttrip) +{ + double TWait; + double t_urg_trip = math_max2(UrgentLatency, Ttrip); + TWait = reserved_vblank_time_ns / 1000.0 + t_urg_trip; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip); + dml2_printf("DML::%s: TWait = %f\n", __func__, TWait); +#endif + return TWait; +} + + +static void CalculateVUpdateAndDynamicMetadataParameters( + unsigned int MaxInterDCNTileRepeaters, + double Dppclk, + double Dispclk, + double DCFClkDeepSleep, + double PixelClock, + unsigned int HTotal, + unsigned int VBlank, + unsigned int DynamicMetadataTransmittedBytes, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + + // Output + double *TSetup, + double *Tdmbf, + double *Tdmec, + double *Tdmsks, + unsigned int *VUpdateOffsetPix, + unsigned int *VUpdateWidthPix, + unsigned int *VReadyOffsetPix) +{ + double TotalRepeaterDelayTime; + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); + *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0)); + *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0)); + *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0)); + *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; + *Tdmec = HTotal / PixelClock; + + if (DynamicMetadataLinesBeforeActiveRequired == 0) { + *Tdmsks = VBlank * HTotal / PixelClock / 2.0; + } else { + *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; + } + if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { + *Tdmsks = *Tdmsks / 2; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank); + dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); + dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk); + dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + + dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + + dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); +#endif +} + +static double get_urgent_bandwidth_required( + struct dml2_core_shared_get_urgent_bandwidth_required_locals *l, + const struct dml2_display_cfg *display_cfg, + enum dml2_core_internal_soc_state_type state_type, + enum dml2_core_internal_bw_type bw_type, + bool inc_flip_bw, // including flip bw + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]) +{ + memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals)); + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0; + l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor; + l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor; + + l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0; + l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1; + l->adj_factor_cur = UrgentBurstFactorCursor[k]; + l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0; + l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1; + l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; + + // both dchub_urgent_bw_at_sdp_noflip and dchub_urgent_bw_at_dram_noflip don't include the phantom_pipe because iflips dont occur while phantom_pipe is active + bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); + bool exclude_this_plane = 0; + + // Exclude phantom pipe in bw calculation for non svp prefetch state + if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) + exclude_this_plane = 1; + + if (display_cfg->plane_descriptors[k].immediate_flip == false || !inc_flip_bw) + l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]); + else + l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k]; + + + if (!exclude_this_plane) { + l->required_bandwidth_mbps_this_surface = math_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], + l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, + l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre); + + l->required_bandwidth_mbps = l->required_bandwidth_mbps + l->required_bandwidth_mbps_this_surface; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + + dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + + dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + + dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); +#endif + } + + return l->required_bandwidth_mbps; +} + +static void CalculateExtraLatency( + const struct dml2_display_cfg *display_cfg, + unsigned int ROBBufferSizeInKByte, + unsigned int RoundTripPingLatencyCycles, + unsigned int ReorderingBytes, + double DCFCLK, + double FabricClock, + unsigned int PixelChunkSizeInKByte, + double ReturnBW, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + unsigned int dpte_group_bytes[], + unsigned int tdlut_bytes_per_group[], + double HostVMInefficiencyFactor, + double HostVMInefficiencyFactorPrefetch, + unsigned int HostVMMinPageSize, + enum dml2_qos_param_type qos_type, + bool max_oustanding_when_urgent_expected, + unsigned int max_outstanding_requests, + unsigned int request_size_bytes_luma[], + unsigned int request_size_bytes_chroma[], + unsigned int MetaChunkSize, + unsigned int dchub_arb_to_ret_delay, + double Ttrip, + unsigned int hostvm_mode, + + // output + double *ExtraLatency, + double *ExtraLatency_sr, + double *ExtraLatencyPrefetch) +{ + double Tarb; + double Tarb_prefetch; + + CalculateTarb( + display_cfg, + PixelChunkSizeInKByte, + NumberOfActiveSurfaces, + NumberOfDPP, + dpte_group_bytes, + tdlut_bytes_per_group, + HostVMInefficiencyFactor, + HostVMInefficiencyFactorPrefetch, + HostVMMinPageSize, + ReturnBW, + MetaChunkSize, + // output + &Tarb, + &Tarb_prefetch); + + unsigned int max_request_size_bytes = 0; + double Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0; + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + if (request_size_bytes_luma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_luma[k]; + if (request_size_bytes_chroma[k] > max_request_size_bytes) + max_request_size_bytes = request_size_bytes_chroma[k]; + } + + if (qos_type == dml2_qos_param_type_dcn4) { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; + *ExtraLatency = *ExtraLatency_sr; + if (max_oustanding_when_urgent_expected) + *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW; + } else { + *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW; + *ExtraLatency = *ExtraLatency_sr; + } + *ExtraLatency = *ExtraLatency + Tex_trips; + *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch; + *ExtraLatency = *ExtraLatency + Tarb; + *ExtraLatency_sr = *ExtraLatency_sr + Tarb; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); + dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); + dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); + dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); +#endif +} + +static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p) +{ + struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals; + + s->NoTimeToPrefetch = false; + s->DPPCycles = 0; + s->DISPCLKCycles = 0; + s->DSTTotalPixelsAfterScaler = 0.0; + s->LineTime = 0.0; + s->dst_y_prefetch_equ = 0.0; + s->prefetch_bw_oto = 0.0; + s->Tvm_oto = 0.0; + s->Tr0_oto = 0.0; + s->Tvm_oto_lines = 0.0; + s->Tr0_oto_lines = 0.0; + s->dst_y_prefetch_oto = 0.0; + s->TimeForFetchingVM = 0.0; + s->TimeForFetchingRowInVBlank = 0.0; + s->LinesToRequestPrefetchPixelData = 0.0; + s->HostVMDynamicLevelsTrips = 0; + s->trip_to_mem = 0.0; + *p->Tvm_trips = 0.0; + *p->Tr0_trips = 0.0; + s->Tvm_trips_rounded = 0.0; + s->Tr0_trips_rounded = 0.0; + s->max_Tsw = 0.0; + s->Lsw_oto = 0.0; + s->Tpre_rounded = 0.0; + s->prefetch_bw_equ = 0.0; + s->Tvm_equ = 0.0; + s->Tr0_equ = 0.0; + s->Tdmbf = 0.0; + s->Tdmec = 0.0; + s->Tdmsks = 0.0; + s->prefetch_sw_bytes = 0.0; + s->prefetch_bw_pr = 0.0; + s->bytes_pp = 0.0; + s->dep_bytes = 0.0; + s->min_Lsw_oto = 0.0; + s->Tsw_est1 = 0.0; + s->Tsw_est3 = 0.0; + s->cursor_prefetch_bytes = 0; + *p->prefetch_cursor_bw = 0; + bool dcc_mrq_enable = (p->dcc_enable && p->mrq_present); + + s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) + + if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) { + s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_page_table_levels; + } else { + s->HostVMDynamicLevelsTrips = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); + dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); + dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); + dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); +#endif + CalculateVUpdateAndDynamicMetadataParameters( + p->MaxInterDCNTileRepeaters, + p->myPipe->Dppclk, + p->myPipe->Dispclk, + p->myPipe->DCFClkDeepSleep, + p->myPipe->PixelClock, + p->myPipe->HTotal, + p->myPipe->VBlank, + p->DynamicMetadataTransmittedBytes, + p->DynamicMetadataLinesBeforeActiveRequired, + p->myPipe->InterlaceEnable, + p->myPipe->ProgressiveToInterlaceUnitInOPP, + p->TSetup, + + // Output + &s->Tdmbf, + &s->Tdmec, + &s->Tdmsks, + p->VUpdateOffsetPix, + p->VUpdateWidthPix, + p->VReadyOffsetPix); + + s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; + s->trip_to_mem = p->Ttrip; + *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); + if (dcc_mrq_enable) + *p->Tvm_trips_flip = *p->Tvm_trips; + else + *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; + *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); + *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); + + if (p->DynamicMetadataVMEnabled == true) { + *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; + *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; + } else { + *p->Tdmdl_vm = 0; + *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex + } + + if (p->DynamicMetadataEnable == true) { + if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { + *p->NotEnoughTimeForDynamicMetadata = true; + dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + } else { + *p->NotEnoughTimeForDynamicMetadata = false; + } + + if (p->myPipe->ScalerEnabled) + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL); + else + s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly); + + s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor); + + s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal; + + if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0) + return true; + + *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay); + *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH + + ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) + + ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + + dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); +#endif + + if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) + *p->DSTYAfterScaler = 1; + else + *p->DSTYAfterScaler = 0; + + s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler; + *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); + *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); +#endif + + s->NoTimeToPrefetch = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); +#endif + if (p->display_cfg->gpuvm_enable) { + s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + s->Tvm_trips_rounded = s->LineTime / 4.0; + *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; + } + s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0); + *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0); + + if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) { + s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; + *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; + } else { + s->Tr0_trips_rounded = s->LineTime / 4.0; + *p->Tr0_trips_flip_rounded = s->LineTime / 4.0; + } + s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0); + *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0); + + *p->Tno_bw_flip = 0; + if (p->display_cfg->gpuvm_enable == true) { + if (p->display_cfg->gpuvm_max_page_table_levels >= 3) { + *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1)); + } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) { + *p->Tno_bw = p->ExtraLatencyPrefetch; + } else { + *p->Tno_bw = 0; + } + *p->Tno_bw_flip = *p->Tno_bw; + } else { + *p->Tno_bw = 0; + } + + if (dml2_core_shared_is_420(p->myPipe->SourcePixelFormat)) { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; + } else { + s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; + } + + s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface; + if (p->myPipe->VRatio < 1.0) + s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; + s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); + + s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + + s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; + s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; + s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + + unsigned int vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; + unsigned int extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); + + if (p->setup_for_tdlut) + vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); + + unsigned long tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_oto = math_max3( + *p->Tvm_trips, + *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, + s->LineTime / 4.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4); +#endif + + } else + s->Tvm_oto = s->LineTime / 4.0; + + if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { + s->Tr0_oto = math_max3( + *p->Tr0_trips, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, + s->LineTime / 4.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); +#endif + } else + s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 4.0; + + s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; + s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; + s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; + + //To (time for delay after scaler) in line time + unsigned int Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + + //Tpre_equ in line time + s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); +#endif + + s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; + s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + + dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); + dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + + s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); + dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes * p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + if (s->prefetch_sw_bytes < s->dep_bytes) { + s->prefetch_sw_bytes = 2 * s->dep_bytes; + dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); + } + + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + + if (s->dst_y_prefetch_equ > 1) { + s->prefetch_bw1 = 0.; + s->prefetch_bw2 = 0.; + s->prefetch_bw3 = 0.; + s->prefetch_bw4 = 0.; + + if (s->Tpre_rounded - *p->Tno_bw > 0) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + + s->prefetch_sw_bytes) + / (s->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1; + } else + s->prefetch_bw1 = 0; + + dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) { + s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / + (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); + } + + if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / + (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw2 = 0; + + if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / + (s->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3; + } else + s->prefetch_bw3 = 0; + + + dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + } + + if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + else + s->prefetch_bw4 = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded); + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded: %f\n", __func__, 2 * s->Tr0_trips_rounded); + dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); +#endif + + { + bool Case1OK = false; + bool Case2OK = false; + bool Case3OK = false; + + if (s->prefetch_bw1 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + Case1OK = true; + } + } + + if (s->prefetch_bw2 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + Case2OK = true; + } + } + + if (s->prefetch_bw3 > 0) { + if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + Case3OK = true; + } + } + + if (Case1OK) { + s->prefetch_bw_equ = s->prefetch_bw1; + } else if (Case2OK) { + s->prefetch_bw_equ = s->prefetch_bw2; + } else if (Case3OK) { + s->prefetch_bw_equ = s->prefetch_bw3; + } else { + s->prefetch_bw_equ = s->prefetch_bw4; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); + dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); + dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK); + dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); +#endif + s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, + p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, + (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + + if (s->prefetch_bw_equ > 0) { + if (p->display_cfg->gpuvm_enable == true) { + s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4); + } else { + s->Tvm_equ = s->LineTime / 4; + } + + if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) { + s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes + *p->Tr0_trips, + s->LineTime / 4); + } else { + s->Tr0_equ = s->LineTime / 4; + } + } else { + s->Tvm_equ = 0; + s->Tr0_equ = 0; + dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); +#endif + + if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { + *p->dst_y_prefetch = s->dst_y_prefetch_oto; + s->TimeForFetchingVM = s->Tvm_oto; + s->TimeForFetchingRowInVBlank = s->Tr0_oto; + + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Using oto bw scheduling for prefetch\n", __func__); +#endif + + } else { + *p->dst_y_prefetch = s->dst_y_prefetch_equ; + s->TimeForFetchingVM = s->Tvm_equ; + s->TimeForFetchingRowInVBlank = s->Tr0_equ; + + if (p->VStartup == p->MaxVStartup) { + *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } else { + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); +#endif + } + dml2_assert(*p->dst_y_prefetch < 64); + + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + + s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); + *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + + dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); +#endif + unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); + + if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { + *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); +#endif + if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { + *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, + (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + *p->VRatioPrefetchY = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); +#endif + } + + *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData; + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); +#endif + if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { + if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { + *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + *p->VRatioPrefetchC = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); +#endif + } + + *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); +#endif + *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; + } else { + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + dml2_printf("DML: To: %fus - time for propogation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + + } else { + dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + s->NoTimeToPrefetch = true; + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + { + double prefetch_vm_bw; + double prefetch_row_bw; + + if (vm_bytes == 0) { + prefetch_vm_bw = 0; + } else if (*p->dst_y_per_vm_vblank > 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); +#endif + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); +#endif + } else { + prefetch_vm_bw = 0; + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + } + + if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { + prefetch_row_bw = 0; + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); +#endif + } else { + prefetch_row_bw = 0; + s->NoTimeToPrefetch = true; + dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + } + + *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); + } + + if (s->NoTimeToPrefetch) { + s->TimeForFetchingVM = 0; + s->TimeForFetchingRowInVBlank = 0; + *p->dst_y_per_vm_vblank = 0; + *p->dst_y_per_row_vblank = 0; + *p->dst_y_prefetch = 0; + s->LinesToRequestPrefetchPixelData = 0; + *p->VRatioPrefetchY = 0; + *p->VRatioPrefetchC = 0; + *p->RequiredPrefetchPixelDataBWLuma = 0; + *p->RequiredPrefetchPixelDataBWChroma = 0; + } + + dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; +} + +static void calculate_peak_bandwidth_required( + struct dml2_core_internal_scratch *s, + + // output + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + + // input + const struct dml2_display_cfg *display_cfg, + unsigned int inc_flip_bw, + unsigned int NumberOfActiveSurfaces, + unsigned int NumberOfDPP[], + double dcc_dram_bw_nom_overhead_factor_p0[], + double dcc_dram_bw_nom_overhead_factor_p1[], + double dcc_dram_bw_pref_overhead_factor_p0[], + double dcc_dram_bw_pref_overhead_factor_p1[], + double mall_prefetch_sdp_overhead_factor[], + double mall_prefetch_dram_overhead_factor[], + double ReadBandwidthLuma[], + double ReadBandwidthChroma[], + double PrefetchBandwidthLuma[], + double PrefetchBandwidthChroma[], + double cursor_bw[], + double dpte_row_bw[], + double meta_row_bw[], + double prefetch_cursor_bw[], + double prefetch_vmrow_bw[], + double flip_bw[], + double UrgentBurstFactorLuma[], + double UrgentBurstFactorChroma[], + double UrgentBurstFactorCursor[], + double UrgentBurstFactorLumaPre[], + double UrgentBurstFactorChromaPre[], + double UrgentBurstFactorCursorPre[]) +{ + unsigned int n; + unsigned int m; + + struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, inc_flip_bw); + dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); +#endif + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + l->unity_array[k] = 1.0; + l->zero_array[k] = 0.0; + } + + for (m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (n = 0; n < dml2_core_internal_bw_max; n++) { + urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + 0, //inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + l->zero_array, //PrefetchBandwidthLuma, + l->zero_array, //PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + l->zero_array, //prefetch_cursor_bw, + l->zero_array, //prefetch_vmrow_bw, + l->zero_array, //flip_bw, + UrgentBurstFactorLuma, + UrgentBurstFactorChroma, + UrgentBurstFactorCursor, + UrgentBurstFactorLumaPre, + UrgentBurstFactorChromaPre, + UrgentBurstFactorCursorPre); + + + urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + PrefetchBandwidthLuma, + PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + prefetch_cursor_bw, + prefetch_vmrow_bw, + flip_bw, + UrgentBurstFactorLuma, + UrgentBurstFactorChroma, + UrgentBurstFactorCursor, + UrgentBurstFactorLumaPre, + UrgentBurstFactorChromaPre, + UrgentBurstFactorCursorPre); + + non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required( + &s->get_urgent_bandwidth_required_locals, + display_cfg, + m, + n, + inc_flip_bw, + NumberOfActiveSurfaces, + NumberOfDPP, + dcc_dram_bw_nom_overhead_factor_p0, + dcc_dram_bw_nom_overhead_factor_p1, + dcc_dram_bw_pref_overhead_factor_p0, + dcc_dram_bw_pref_overhead_factor_p1, + mall_prefetch_sdp_overhead_factor, + mall_prefetch_dram_overhead_factor, + ReadBandwidthLuma, + ReadBandwidthChroma, + PrefetchBandwidthLuma, + PrefetchBandwidthChroma, + cursor_bw, + dpte_row_bw, + meta_row_bw, + prefetch_cursor_bw, + prefetch_vmrow_bw, + flip_bw, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array, + l->unity_array); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_vactive_bandwidth_required[m][n]); + dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_required[m][n]); + dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), non_urg_bandwidth_required[m][n]); +#endif + dml2_assert(urg_bandwidth_required[m][n] >= non_urg_bandwidth_required[m][n]); + } + } +} + +static void check_urgent_bandwidth_support( + double *frac_urg_bandwidth_nom, + double *frac_urg_bandwidth_mall, + bool *vactive_bandwidth_support_ok, // vactive ok + bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok + + unsigned int mall_allocated_for_dcn_mbytes, + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + *bandwidth_support_ok = 1; + *vactive_bandwidth_support_ok = 1; + + double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + double frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + double frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram + // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL + // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch + + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + + if (mall_allocated_for_dcn_mbytes > 0) { + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + + *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram); + *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram); + + *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0); + + if (mall_allocated_for_dcn_mbytes > 0) + *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0); + + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + if (mall_allocated_for_dcn_mbytes > 0) { + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + + dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); +#endif + +} + +static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double flip_bw_available_mbps; + double flip_bw_available_sdp_mbps; + double flip_bw_available_dram_mbps; + + flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]; + flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]; + flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); +#endif + + return flip_bw_available_mbps; +} + +static void calculate_immediate_flip_bandwidth_support( + // Output + double *frac_urg_bandwidth_flip, + bool *flip_bandwidth_support_ok, + + // Input + enum dml2_core_internal_soc_state_type eval_state, + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]) +{ + double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]; + double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]; + + *flip_bandwidth_support_ok = true; + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram + *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str((enum dml2_core_internal_bw_type) eval_state)); + dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); +#endif + dml2_assert(urg_bandwidth_required_flip[eval_state][n] > non_urg_bandwidth_required_flip[eval_state][n]); + } + + *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; + *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + + for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { + for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { + dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), + urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); + } + } +#endif +} + +static void CalculateFlipSchedule( + struct dml2_core_internal_scratch *s, + bool iflip_enable, + bool use_lb_flip_bw, + double HostVMInefficiencyFactor, + double Tvm_trips_flip, + double Tr0_trips_flip, + double Tvm_trips_flip_rounded, + double Tr0_trips_flip_rounded, + bool GPUVMEnable, + double vm_bytes, // vm_bytes + double DPTEBytesPerRow, // dpte_row_bytes + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum dml2_source_format_class SourcePixelFormat, + double LineTime, + double VRatio, + double VRatioChroma, + double Tno_bw_flip, + unsigned int dpte_row_height, + unsigned int dpte_row_height_chroma, + bool use_one_row_for_frame_flip, + unsigned int max_flip_time_us, + unsigned int per_pipe_flip_bytes, + unsigned int meta_row_bytes, + unsigned int meta_row_height, + unsigned int meta_row_height_chroma, + bool dcc_mrq_enable, + + // Output + double *dst_y_per_vm_flip, + double *dst_y_per_row_flip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; + + l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dpte_row_bytes = DPTEBytesPerRow; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); + dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); +#endif + + if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { + if (l->dual_plane) { + if (dcc_mrq_enable & GPUVMEnable) { + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma); + } else if (GPUVMEnable) { + l->min_row_height = dpte_row_height; + l->min_row_height_chroma = dpte_row_height_chroma; + } else { + l->min_row_height = meta_row_height; + l->min_row_height_chroma = meta_row_height_chroma; + } + l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma); + } else { + if (dcc_mrq_enable & GPUVMEnable) + l->min_row_height = math_min2(dpte_row_height, meta_row_height); + else if (GPUVMEnable) + l->min_row_height = dpte_row_height; + else + l->min_row_height = meta_row_height; + + l->min_row_time = l->min_row_height * LineTime / VRatio; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); +#endif + dml2_assert(l->min_row_time > 0); + + if (use_lb_flip_bw) { + // For mode check, calculation the flip bw requirement with worst case flip time + l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + + //The lower bound on flip bandwidth + // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required + l->lb_flip_bw = 0; + + if (iflip_enable) { + l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor; + l->num_rows = 2; + l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes); + l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes; + l->lb_flip_bw = math_max3( + l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip), + l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), + l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + + if (l->lb_flip_bw > 0) { + dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + } +#endif + l->lb_flip_bw = math_max3(l->lb_flip_bw, + l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); +#endif + } + + *final_flip_bw = l->lb_flip_bw; + + *dst_y_per_vm_flip = 1; // not used + *dst_y_per_row_flip = 1; // not used + *ImmediateFlipSupportedForPipe = true; + } else { + if (iflip_enable) { + l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); +#endif + if (l->ImmediateFlipBW == 0) { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + } else { + l->Tvm_flip = math_max3(Tvm_trips_flip, + Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, + LineTime / 4.0); + + l->Tr0_flip = math_max3(Tr0_trips_flip, + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, + LineTime / 4.0); + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + + dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); +#endif + *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; + *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; + + *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime), + (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime)); + + if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) { + *ImmediateFlipSupportedForPipe = false; + } else { + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + } + } else { + l->Tvm_flip = 0; + l->Tr0_flip = 0; + *dst_y_per_vm_flip = 0; + *dst_y_per_row_flip = 0; + *final_flip_bw = 0; + *ImmediateFlipSupportedForPipe = iflip_enable; + } + +#ifdef __DML_VBA_DEBUG__ + if (!use_lb_flip_bw) { + dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + } + dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); +#endif +} + +static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p) +{ + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + + s->TotalActiveWriteback = 0; + p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); +#endif + + p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; + p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark; + p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); +#endif + + s->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + s->TotalActiveWriteback = s->TotalActiveWriteback + 1; + } + } + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + } + if (p->USRRetrainingRequired) + p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (s->TotalActiveWriteback <= 1) { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency; + } else { + p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK; + p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK; + } + + if (p->USRRetrainingRequired) + p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + + if (p->USRRetrainingRequired) + p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); +#endif + + s->TotalPixelBW = 0.0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + + s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k] + * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz); + } + + *p->global_fclk_change_supported = true; + *p->global_dram_clock_change_supported = true; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0; + double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio; + double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio; + double LBBitPerPixel = 57; + + s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1)); + s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines); + dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); + dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); +#endif + + s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); + s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz); + + s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k]; + if (p->UnboundedRequestEnabled) { + s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW; + } + + s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k])); + s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio; + + s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz; + + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio; + } + + if (p->BytePerPixelDETC[k] > 0) { + s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k]; + s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k])); + s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c; + s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz; + if (p->NumberOfActiveSurfaces > 1) { + s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c; + } + s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC); + } else { + s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY; + } + + s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; + s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; + s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; + + if (p->VActiveLatencyHidingMargin) + p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; + + p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; + + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; + } + s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; + + s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark; + + s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin); + s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin); + } + p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency); + + enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; + double reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; + + p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported; + if (s->ActiveFCLKChangeLatencyMargin[k] > 0) + p->FCLKChangeSupport[k] = dml2_fclock_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) + p->FCLKChangeSupport[k] = dml2_fclock_change_vblank; + + if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported) + *p->global_fclk_change_supported = false; + + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; + if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { + if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; + else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp; + else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) + p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame; + + if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported) + *p->global_dram_clock_change_supported = false; + + s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); + s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k])); + s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]); + s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, p->meta_row_height_l[k]); + dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); +#endif + p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; + + if (p->BytePerPixelDETC[k] > 0) { + s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k])); + s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); + s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; + + if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); + else + p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); +#endif + } + } + + bool FoundCriticalSurface = false; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface) + || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) { + FoundCriticalSurface = true; + *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency; + } + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); +#endif +} + +static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + double bw_mbps = 0; + bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return bw_mbps; +} + +static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_mhz = 0; + + uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0; + + return uclk_mhz; +} + +static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +static unsigned int get_pipe_flip_bytes( + double hostvm_inefficiency_factor, + unsigned int vm_bytes, + unsigned int dpte_row_bytes, + unsigned int meta_row_bytes) +{ + unsigned int flip_bytes = 0; + + flip_bytes += (unsigned int)((vm_bytes * hostvm_inefficiency_factor) + 2 * meta_row_bytes); + flip_bytes += (unsigned int)(2 * dpte_row_bytes * hostvm_inefficiency_factor); + + return flip_bytes; +} + +static void calculate_hostvm_inefficiency_factor( + double *HostVMInefficiencyFactor, + double *HostVMInefficiencyFactorPrefetch, + + bool gpuvm_enable, + bool hostvm_enable, + unsigned int remote_iommu_outstanding_translations, + unsigned int max_outstanding_reqs, + double urg_bandwidth_avail_active_pixel_and_vm, + double urg_bandwidth_avail_active_vm_only) +{ + *HostVMInefficiencyFactor = 1; + *HostVMInefficiencyFactorPrefetch = 1; + + if (gpuvm_enable && hostvm_enable) { + *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only; + *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor; + + if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) + *HostVMInefficiencyFactorPrefetch = 4; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); +#endif + } +} + +static void CalculatePixelDeliveryTimes( + const struct dml2_display_cfg *display_cfg, + const struct core_display_cfg_support_info *cfg_support_info, + unsigned int NumberOfActiveSurfaces, + double VRatioPrefetchY[], + double VRatioPrefetchC[], + unsigned int swath_width_luma_ub[], + unsigned int swath_width_chroma_ub[], + double PSCL_THROUGHPUT[], + double PSCL_THROUGHPUT_CHROMA[], + double Dppclk[], + unsigned int BytePerPixelC[], + unsigned int req_per_swath_ub_l[], + unsigned int req_per_swath_ub_c[], + + // Output + double DisplayPipeLineDeliveryTimeLuma[], + double DisplayPipeLineDeliveryTimeChroma[], + double DisplayPipeLineDeliveryTimeLumaPrefetch[], + double DisplayPipeLineDeliveryTimeChromaPrefetch[], + double DisplayPipeRequestDeliveryTimeLuma[], + double DisplayPipeRequestDeliveryTimeChroma[], + double DisplayPipeRequestDeliveryTimeLumaPrefetch[], + double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) +{ + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); +#endif + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChroma[k] = 0; + } else { + if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } + + if (VRatioPrefetchY[k] <= 1) { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; + } + + if (BytePerPixelC[k] == 0) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (VRatioPrefetchC[k] <= 1) { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz; + } else { + DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; + } + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); +#endif + } + + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + + DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k]; + DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k]; + if (BytePerPixelC[k] == 0) { + DisplayPipeRequestDeliveryTimeChroma[k] = 0; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; + } else { + DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k]; + DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); +#endif + } +} + +static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p) +{ + unsigned int meta_chunk_width; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_chunk_width_chroma; + unsigned int min_meta_chunk_width_chroma; + unsigned int meta_chunk_per_row_int_chroma; + unsigned int meta_row_remainder_chroma; + unsigned int meta_chunk_threshold_chroma; + unsigned int meta_chunks_per_row_ub_chroma; + unsigned int dpte_group_width_luma; + unsigned int dpte_groups_per_row_luma_ub; + unsigned int dpte_group_width_chroma; + unsigned int dpte_groups_per_row_chroma_ub; + double pixel_clock_mhz; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_META_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) { + meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k]; + meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width; + meta_row_remainder = p->meta_row_width[k] % meta_chunk_width; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k]; + } else { + meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k]; + } + if (meta_row_remainder <= meta_chunk_threshold) { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + } else { + meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + } + p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio * + p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / + (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub; + if (p->BytePerPixelC[k] == 0) { + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } else { + meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k]; + meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma); + meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma; + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k]; + } else { + meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k]; + } + if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; + } else { + meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; + } + p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma; + } + } else { + p->TimePerMetaChunkNominal[k] = 0; + p->TimePerMetaChunkVBlank[k] = 0; + p->TimePerMetaChunkFlip[k] = 0; + p->TimePerChromaMetaChunkNominal[k] = 0; + p->TimePerChromaMetaChunkVBlank[k] = 0; + p->TimePerChromaMetaChunkFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); +#endif + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + if (p->BytePerPixelC[k] == 0) { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0; + } else { + p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + } + } + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k]; + else + p->time_per_tdlut_group[k] = 0; + + dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + + if (p->display_cfg->gpuvm_enable == true) { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]); + } else { + dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]); + } + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0)); + } else { + dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); + } + + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + + p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; + if (p->BytePerPixelC[k] == 0) { + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } else { + if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]); + } else { + dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]); + } + + if (p->use_one_row_for_frame[k]) { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0)); + } else { + dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); + } + dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + + p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; + } + } else { + p->time_per_pte_group_nom_luma[k] = 0; + p->time_per_pte_group_vblank_luma[k] = 0; + p->time_per_pte_group_flip_luma[k] = 0; + p->time_per_pte_group_nom_chroma[k] = 0; + p->time_per_pte_group_vblank_chroma[k] = 0; + p->time_per_pte_group_flip_chroma[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + + dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); +#endif + } +} // CalculateMetaAndPTETimes + +static void CalculateVMGroupAndRequestTimes( + const struct dml2_display_cfg *display_cfg, + unsigned int NumberOfActiveSurfaces, + unsigned int BytePerPixelC[], + double dst_y_per_vm_vblank[], + double dst_y_per_vm_flip[], + unsigned int dpte_row_width_luma_ub[], + unsigned int dpte_row_width_chroma_ub[], + unsigned int vm_group_bytes[], + unsigned int dpde0_bytes_per_frame_ub_l[], + unsigned int dpde0_bytes_per_frame_ub_c[], + unsigned int tdlut_pte_bytes_per_frame[], + unsigned int meta_pte_bytes_per_frame_ub_l[], + unsigned int meta_pte_bytes_per_frame_ub_c[], + bool mrq_present, + + // Output + double TimePerVMGroupVBlank[], + double TimePerVMGroupFlip[], + double TimePerVMRequestVBlank[], + double TimePerVMRequestFlip[]) +{ + unsigned int num_group_per_lower_vm_stage = 0; + unsigned int num_req_per_lower_vm_stage = 0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); +#endif + for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { + double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); +#endif + + if (display_cfg->gpuvm_enable) { + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_group_per_lower_vm_stage += (unsigned int)math_ceil2((double)(dpde0_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1); + + if (BytePerPixelC[k] > 0) + num_group_per_lower_vm_stage += (unsigned int)math_ceil2((double)(dpde0_bytes_per_frame_ub_c[k]) / (double)(vm_group_bytes[k]), 1); + } + + if (dcc_mrq_enable) { + if (BytePerPixelC[k] > 0) { + num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double)(meta_pte_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1) + + math_ceil2((double)(meta_pte_bytes_per_frame_ub_c[k]) / (double)(vm_group_bytes[k]), 1)); + } else { + num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double)(meta_pte_bytes_per_frame_ub_l[k]) / (double)(vm_group_bytes[k]), 1)); + } + } + + unsigned int num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage; + unsigned int num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_group_per_lower_vm_stage_pref += (unsigned int)math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1); + if (display_cfg->gpuvm_max_page_table_levels >= 2) + num_group_per_lower_vm_stage_pref += 1; // tdpe0 group + } + + if (display_cfg->gpuvm_max_page_table_levels >= 2) { + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k]; + } + + if (dcc_mrq_enable) { + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64; + if (BytePerPixelC[k] > 0) + num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64; + } + + unsigned int num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage; + unsigned int num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage; + + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) { + num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64; + } + + double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; + TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip; + + dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref); + dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip); + dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref); + dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip); + + if (display_cfg->gpuvm_max_page_table_levels > 2) { + TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; + TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; + TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; + TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; + } + + } else { + TimePerVMGroupVBlank[k] = 0; + TimePerVMGroupFlip[k] = 0; + TimePerVMRequestVBlank[k] = 0; + TimePerVMRequestFlip[k] = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); +#endif + } +} + +static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CalculateStutterEfficiency_params *p) +{ + struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals; + + memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionLuma = 2; + } else { + l->MaximumEffectiveCompressionLuma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; + + if (p->ReadBandwidthSurfaceChroma[k] > 0) { + if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) { + l->MaximumEffectiveCompressionChroma = 2; + } else { + l->MaximumEffectiveCompressionChroma = 4; + } + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); +#endif + l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; + l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; + } + } else { + l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k]; + } + l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]); + } + } + + l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth; + l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + + dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); +#endif + if (l->AverageDCCZeroSizeFraction == 1) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate; + + + } else if (l->AverageDCCZeroSizeFraction > 0) { + l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) + + (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate, + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)) + : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); +#endif + } else { + l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, + (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) + + ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); +#endif + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); +#endif + + bool FoundCriticalSurface = false; + *p->StutterPeriod = 0; + + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k]; + l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); + l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); +#endif + + if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { + bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP; + + FoundCriticalSurface = true; + *p->StutterPeriod = l->DETBufferingTimeY; + l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k]; + l->SwathWidthYCriticalSurface = p->SwathWidthY[k]; + l->SwathHeightYCriticalSurface = p->SwathHeightY[k]; + l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k]; + l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k]; + l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k]; + l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0); + l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); +#endif + } + } + } + + // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer + // stutter period is calculated only on the det sizing + // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress + // else + // the cdb + rob part will be in compressed rate with urg bw (idea bw) + // the det part will be return at uncompressed rate with 64B/dcfclk + // + // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer" + // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk + + l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); +#endif + + l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer + / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) + / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); +#endif + + l->TotalActiveWriteback = 0; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + l->TotalActiveWriteback = l->TotalActiveWriteback + 1; + } + } + + if (l->TotalActiveWriteback == 0) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); +#endif + *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; + *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0); + } else { + *p->StutterEfficiencyNotIncludingVBlank = 0.; + *p->Z8StutterEfficiencyNotIncludingVBlank = 0.; + *p->NumberOfStutterBurstsPerFrame = 0; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + unsigned int TotalNumberOfActiveOTG = 0; + double SinglePixelClock = 0; + unsigned int SingleHTotal = 0; + unsigned int SingleVTotal = 0; + bool SameTiming = true; + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { + if (p->display_cfg->plane_descriptors[k].stream_index == k) { + if (TotalNumberOfActiveOTG == 0) { + SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total; + SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total; + } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) || SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total || SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) { + SameTiming = false; + } + TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + } + } + } + + if (*p->StutterEfficiencyNotIncludingVBlank > 0) { + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank; + } else { + *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->StutterEfficiency = 0; + *p->NumberOfStutterBurstsPerFrame = 0; + } + + double LastZ8StutterPeriod = 0.0; + + if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { + LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { + *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; + } else { + *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100; + } + } else { + *p->Z8StutterEfficiency = 0.; + *p->Z8NumberOfStutterBurstsPerFrame = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); + dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); +#endif + + + unsigned int SwathSizeCriticalSurface; + unsigned int LastChunkOfSwathSize; + unsigned int MissingPartOfLastSwathOfDETSize; + + SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); + LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); + MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); + + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && + (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); + dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); + dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); + dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); +#endif +} + +bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params) +{ + const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info; + struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; + struct dml2_display_cfg_programming *programming = in_out_params->programming; + + struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; + + unsigned int j, k; + + dml2_printf("DML::%s: --- START --- \n", __func__); + + memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); + + s->num_active_planes = display_cfg->num_planes; + get_stream_output_bpp(s->OutputBpp, display_cfg); + + mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); + dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); + + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + + for (k = 0; k < s->num_active_planes; ++k) { + switch (cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].odms_used) { + case (4): + if (cfg_support_info->plane_support_info[k].dpps_used == 1) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2; // FIXME_STAGE2: for mode programming same as dml2_odm_mode_split_1to2? + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1; + break; + case (3): + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1; + break; + case (2): + if (cfg_support_info->plane_support_info[k].dpps_used == 1) + mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4; + else + mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1; + break; + default: + mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass; + break; + } + } + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + dml2_assert(mode_lib->mp.Dppclk[k] > 0); + } + + for (k = 0; k < s->num_active_planes; ++k) { + unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + } + + mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + + dml2_assert(mode_lib->mp.Dcfclk > 0); + dml2_assert(mode_lib->mp.FabricClock > 0); + dml2_assert(mode_lib->mp.dram_bw_mbps > 0); + dml2_assert(mode_lib->mp.uclk_freq_mhz > 0); + dml2_assert(mode_lib->mp.GlobalDPPCLK > 0); + dml2_assert(mode_lib->mp.Dispclk > 0); + dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0); + dml2_assert(s->SOCCLK > 0); + +#ifdef __DML_VBA_DEBUG__ + // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes); + // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes); + // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes); + // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes); + // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes); + + dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + for (k = 0; k < s->num_active_planes; ++k) { + dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + } + dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { + dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + } + + for (k = 0; k < s->num_active_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); +#endif + + CalculateMaxDETAndMinCompressedBufferSize( + mode_lib->ip.config_return_buffer_size_in_kbytes, + mode_lib->ip.config_return_buffer_segment_size_in_kbytes, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->ip.max_num_dpp, + display_cfg->overrides.hw.force_nom_det_size_kbytes.enable, + display_cfg->overrides.hw.force_nom_det_size_kbytes.value, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + &s->MaxTotalDETInKByte, + &s->NomDETInKByte, + &s->MinCompressedBufferSizeInKByte); + + + PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd); + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateSinglePipeDPPCLKAndSCLThroughput( + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ip.max_dchub_pscl_bw_pix_per_clk, + mode_lib->ip.max_pscl_lb_bw_pix_per_clk, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps, + + /* Output */ + &mode_lib->mp.PSCL_THROUGHPUT[k], + &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k], + &mode_lib->mp.DPPCLKUsingSingleDPP[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) { + CalculateBytePerPixelAndBlockSizes( + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.tiling, + display_cfg->plane_descriptors[k].surface.plane0.pitch, + display_cfg->plane_descriptors[k].surface.plane1.pitch, + + // Output + &mode_lib->mp.BytePerPixelY[k], + &mode_lib->mp.BytePerPixelC[k], + &mode_lib->mp.BytePerPixelInDETY[k], + &mode_lib->mp.BytePerPixelInDETC[k], + &mode_lib->mp.Read256BlockHeightY[k], + &mode_lib->mp.Read256BlockHeightC[k], + &mode_lib->mp.Read256BlockWidthY[k], + &mode_lib->mp.Read256BlockWidthC[k], + &mode_lib->mp.MacroTileHeightY[k], + &mode_lib->mp.MacroTileHeightC[k], + &mode_lib->mp.MacroTileWidthY[k], + &mode_lib->mp.MacroTileWidthC[k], + &mode_lib->mp.surf_linear128_l[k], + &mode_lib->mp.surf_linear128_c[k]); + } + + CalculateSwathWidth( + display_cfg, + false, // ForceSingleDPP + s->num_active_planes, + mode_lib->mp.ODMMode, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.surf_linear128_l, + mode_lib->mp.surf_linear128_c, + mode_lib->mp.NoOfDPP, + + /* Output */ + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + mode_lib->mp.SwathWidthSingleDPPY, + mode_lib->mp.SwathWidthSingleDPPC, + mode_lib->mp.SwathWidthY, + mode_lib->mp.SwathWidthC, + s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[] + s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[] + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub); + + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); + mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + } + + CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte; + CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes; + + CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false; + CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; + CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; + CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY; + CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC; + CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l; + CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c; + CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode; + CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY; + CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC; + CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC; + + // output + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l; + CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c; + CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0]; + CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1]; + CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2]; + CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3]; + CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma; + CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma; + CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte; + CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l; + CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c; + CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled; + CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b; + CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5; + CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0]; + CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0]; + CalculateSwathAndDETConfiguration_params->funcs = &mode_lib->funcs; + + // VBA_DELTA + // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage + CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params); + + // DSCCLK + /* + s->DSCFormatFactor = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if ((display_cfg->plane_descriptors[k].stream_index != k) || !cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable) { + } else { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) + s->DSCFormatFactor = 2; + else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) + s->DSCFormatFactor = 1; + else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) + s->DSCFormatFactor = 2; + else + s->DSCFormatFactor = 1; + + s->PixelClockBackEndFactor = 3.0; + + if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_4to1) + s->PixelClockBackEndFactor = 12.0; + else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_3to1) + s->PixelClockBackEndFactor = 9.0; + else if (mode_lib->mp.ODMMode[k] == dml2_odm_mode_combine_2to1) + s->PixelClockBackEndFactor = 6.0; + + } + #ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, DSCEnabled = %u\n", __func__, k, cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable); + dml2_printf("DML::%s: k=%u, BlendingAndTiming = %u\n", __func__, k, display_cfg->plane_descriptors[k].stream_index); + dml2_printf("DML::%s: k=%u, PixelClockBackEndFactor = %f\n", __func__, k, s->PixelClockBackEndFactor); + dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + dml2_printf("DML::%s: k=%u, DSCCLK = %f\n", __func__, k, mode_lib->mp.DSCCLK[k]); + #endif + } + */ + + // DSC Delay + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable, + mode_lib->mp.ODMMode[k], + mode_lib->ip.maximum_dsc_bits_per_component, + s->OutputBpp[k], + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, + cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder, + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + s->PixelClockBackEnd[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) + for (j = 0; j < s->num_active_planes; ++j) // NumberOfSurfaces + if (j != k && display_cfg->plane_descriptors[k].stream_index == j && cfg_support_info->stream_support_info[display_cfg->plane_descriptors[j].stream_index].dsc_enable) + mode_lib->mp.DSCDelay[k] = mode_lib->mp.DSCDelay[j]; + + // Prefetch + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) { + for (k = 0; k < s->num_active_planes; ++k) + mode_lib->mp.SurfaceSizeInTheMALL[k] = 0; + } else { + CalculateSurfaceSizeInMall( + display_cfg, + s->num_active_planes, + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.BytePerPixelY, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.Read256BlockWidthY, + mode_lib->mp.Read256BlockWidthC, + mode_lib->mp.Read256BlockHeightY, + mode_lib->mp.Read256BlockHeightC, + mode_lib->mp.MacroTileWidthY, + mode_lib->mp.MacroTileWidthC, + mode_lib->mp.MacroTileHeightY, + mode_lib->mp.MacroTileHeightC, + + /* Output */ + mode_lib->mp.SurfaceSizeInTheMALL, + &s->dummy_boolean[0]); /* bool *ExceededMALLSize */ + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k]; + s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k]; + s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k]; + s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k]; + s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling; + s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch; + s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch; + s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame; + s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k]; + s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k]; + s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch; + s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch; + } + + CalculateVMRowAndSwath_params->display_cfg = display_cfg; + CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters; + CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma; + CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes; + CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes; + CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + // output + CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; + CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height; + CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear; + CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma; + CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes; + CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y; + CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y; + CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c; + CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c; + CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY; + CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC; + CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY; + CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC; + CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY; + CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC; + CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow; + CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes; + CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip; + CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss; + CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE; + CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE; + CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1]; + CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes; + CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width; + CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height; + CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width; + CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l; + CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c; + + CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params); + + memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); + if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + for (k = 0; k < s->num_active_planes; k++) { + mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0; + mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0; + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0; + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0; + } + } else { + for (k = 0; k < s->num_active_planes; k++) { + calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count; + calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes; + calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes; + calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes; + calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format; + calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle); + calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary; + calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling; + calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall; + + calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start; + calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start; + calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width; + calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height; + calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k]; + calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k]; + calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k]; + calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k]; + calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k]; + calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k]; + + calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start; + calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width; + calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height; + calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k]; + calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k]; + calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k]; + calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k]; + calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k]; + calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k]; + + // output + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k]; + calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k]; + calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k]; + + calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k]; + calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k]; + calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k]; + calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k]; + + calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k]; + calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k]; + calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k]; + calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k]; + + calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k]; + calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k]; + calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k]; + calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params); + } + + calculate_mall_bw_overhead_factor( + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + + // input + display_cfg, + s->num_active_planes); + } + + // Calculate all the bandwidth availabe + calculate_bandwidth_available( + mode_lib->mp.avg_bandwidth_available_min, + mode_lib->mp.avg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_min, + mode_lib->mp.urg_bandwidth_available, + mode_lib->mp.urg_bandwidth_available_vm_only, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm, + + &mode_lib->soc, + display_cfg->hostvm_enable, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->mp.dram_bw_mbps); + + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + s->TotalDCCActiveDPP = 0; + s->TotalActiveDPP = 0; + for (k = 0; k < s->num_active_planes; ++k) { + s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k]; + if (display_cfg->plane_descriptors[k].surface.dcc.enable) + s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k]; + } + // Calculate tdlut schedule related terms + for (k = 0; k <= s->num_active_planes - 1; k++) { + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + s->ReorderBytes, + mode_lib->mp.Dcfclk, + mode_lib->mp.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active], + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->mp.request_size_bytes_luma, + mode_lib->mp.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->mp.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->mp.ExtraLatency, + &mode_lib->mp.ExtraLatency_sr, + &mode_lib->mp.ExtraLatencyPrefetch); + + mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].stream_index == k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackDelay[k] = + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; + } else + mode_lib->mp.WritebackDelay[k] = 0; + + for (j = 0; j < s->num_active_planes; ++j) { + if (display_cfg->plane_descriptors[j].stream_index == k + && display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackDelay[k] = + math_max2( + mode_lib->mp.WritebackDelay[k], + mode_lib->soc.qos_parameters.writeback.base_latency_us + + CalculateWriteBackDelay( + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[j].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk); + } + } + } + } + + for (k = 0; k < s->num_active_planes; ++k) + for (j = 0; j < s->num_active_planes; ++j) + if (display_cfg->plane_descriptors[k].stream_index == j) + mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; + + mode_lib->mp.UrgentLatency = CalculateUrgentLatency( + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.do_urgent_latency_adjustment, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = CalculateTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); + + mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory( + mode_lib->mp.UrgentLatency, + mode_lib->mp.FabricClock, + mode_lib->mp.uclk_freq_mhz, + mode_lib->soc.qos_parameters.qos_type, + mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + + for (k = 0; k < s->num_active_planes; ++k) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + bool cursor_not_enough_urgent_latency_hiding = 0; + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, + + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; + + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + + /* output */ + &mode_lib->mp.UrgentBurstFactorLuma[k], + &mode_lib->mp.UrgentBurstFactorChroma[k], + &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + + mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding; + } + + for (k = 0; k < s->num_active_planes; ++k) { + s->MaxVStartupLines[k] = CalculateMaxVStartup( + mode_lib->ip.ptoi_supported, + mode_lib->ip.vblank_nom_default_us, + &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing, + mode_lib->mp.WritebackDelay[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); +#endif + } + + s->immediate_flip_required = false; + for (k = 0; k < s->num_active_planes; ++k) { + s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); +#endif + + { + s->DestinationLineTimesForPrefetchLessThan2 = false; + s->VRatioPrefetchMoreThanMax = false; + + dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + + for (k = 0; k < s->num_active_planes; ++k) { + dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + + mode_lib->mp.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + myPipe->Dppclk = mode_lib->mp.Dppclk[k]; + myPipe->Dispclk = mode_lib->mp.Dispclk; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->mp.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k]; + CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k]; + CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + + mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); +#endif + mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; + } // for k + + mode_lib->mp.PrefetchModeSupported = true; + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NoTimeToPrefetch[k] == true || + mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || + mode_lib->mp.DSTYAfterScaler[k] > 8) { + dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + if (mode_lib->mp.dst_y_prefetch[k] < 2) + s->DestinationLineTimesForPrefetchLessThan2 = true; + + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + s->VRatioPrefetchMoreThanMax = true; + + if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { + dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + + if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + mode_lib->mp.PrefetchModeSupported = false; + } + + dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); + + // Prefetch schedule OK, now check prefetch bw + if (mode_lib->mp.PrefetchModeSupported == true) { + for (k = 0; k < s->num_active_planes; ++k) { + double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->mp.swath_width_luma_ub[k], + mode_lib->mp.swath_width_chroma_ub[k], + mode_lib->mp.SwathHeightY[k], + mode_lib->mp.SwathHeightC[k], + line_time_us, + mode_lib->mp.UrgentLatency, + mode_lib->mp.VRatioPrefetchY[k], + mode_lib->mp.VRatioPrefetchC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + mode_lib->mp.DETBufferSizeY[k], + mode_lib->mp.DETBufferSizeC[k], + /* Output */ + &mode_lib->mp.UrgentBurstFactorLumaPre[k], + &mode_lib->mp.UrgentBurstFactorChromaPre[k], + &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + + dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + + dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); +#endif + } + + for (k = 0; k <= s->num_active_planes - 1; k++) + mode_lib->mp.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.non_urg_bandwidth_required, + + // Input + display_cfg, + 0, // inc_flip_bw + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + mode_lib->mp.SurfaceReadBandwidthLuma, + mode_lib->mp.SurfaceReadBandwidthChroma, + mode_lib->mp.RequiredPrefetchPixelDataBWLuma, + mode_lib->mp.RequiredPrefetchPixelDataBWChroma, + mode_lib->mp.cursor_bw, + mode_lib->mp.dpte_row_bw, + mode_lib->mp.meta_row_bw, + mode_lib->mp.prefetch_cursor_bw, + mode_lib->mp.prefetch_vmrow_bw, + mode_lib->mp.final_flip_bw, + mode_lib->mp.UrgentBurstFactorLuma, + mode_lib->mp.UrgentBurstFactorChroma, + mode_lib->mp.UrgentBurstFactorCursor, + mode_lib->mp.UrgentBurstFactorLumaPre, + mode_lib->mp.UrgentBurstFactorChromaPre, + mode_lib->mp.UrgentBurstFactorCursorPre); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth + &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall + &s->dummy_boolean[1], // vactive bw ok + &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->mp.non_urg_bandwidth_required, + mode_lib->mp.urg_vactive_bandwidth_required, + mode_lib->mp.urg_bandwidth_required, + mode_lib->mp.urg_bandwidth_available); + + for (k = 0; k < s->num_active_planes; ++k) { + if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { + dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + mode_lib->mp.PrefetchModeSupported = false; + } + } + } // prefetch schedule ok + + // Prefetch schedule and prefetch bw ok, now check flip bw + if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw + + mode_lib->mp.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip(dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required, // no flip + mode_lib->mp.urg_bandwidth_available); + mode_lib->mp.TotImmediateFlipBytes = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k = %u\n", __func__, k); + dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); +#endif + } + for (k = 0; k < s->num_active_planes; ++k) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 0, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->mp.vm_bytes[k], + mode_lib->mp.PixelPTEBytesPerRow[k], + mode_lib->mp.BandwidthAvailableForImmediateFlip, + mode_lib->mp.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->mp.Tno_bw[k], + mode_lib->mp.dpte_row_height[k], + mode_lib->mp.dpte_row_height_chroma[k], + mode_lib->mp.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + s->per_pipe_flip_bytes[k], + mode_lib->mp.meta_row_bytes[k], + mode_lib->mp.meta_row_height[k], + mode_lib->mp.meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + // Output + &mode_lib->mp.dst_y_per_vm_flip[k], + &mode_lib->mp.dst_y_per_row_flip[k], + &mode_lib->mp.final_flip_bw[k], + &mode_lib->mp.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + s->dummy_bw, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + + // Input + display_cfg, + 1, // inc_flip_bw + s->num_active_planes, + mode_lib->mp.NoOfDPP, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0, + mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1, + mode_lib->mp.mall_prefetch_sdp_overhead_factor, + mode_lib->mp.mall_prefetch_dram_overhead_factor, + mode_lib->mp.SurfaceReadBandwidthLuma, + mode_lib->mp.SurfaceReadBandwidthChroma, + mode_lib->mp.RequiredPrefetchPixelDataBWLuma, + mode_lib->mp.RequiredPrefetchPixelDataBWChroma, + mode_lib->mp.cursor_bw, + mode_lib->mp.dpte_row_bw, + mode_lib->mp.meta_row_bw, + mode_lib->mp.prefetch_cursor_bw, + mode_lib->mp.prefetch_vmrow_bw, + mode_lib->mp.final_flip_bw, + mode_lib->mp.UrgentBurstFactorLuma, + mode_lib->mp.UrgentBurstFactorChroma, + mode_lib->mp.UrgentBurstFactorCursor, + mode_lib->mp.UrgentBurstFactorLumaPre, + mode_lib->mp.UrgentBurstFactorChromaPre, + mode_lib->mp.UrgentBurstFactorCursorPre); + + calculate_immediate_flip_bandwidth_support( + &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip + &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok + + dml2_core_internal_soc_state_sys_active, + mode_lib->mp.urg_bandwidth_required_flip, + mode_lib->mp.non_urg_bandwidth_required_flip, + mode_lib->mp.urg_bandwidth_available); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->mp.ImmediateFlipSupported = false; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Pipe %0d not supporing iflip!\n", __func__, k); +#endif + } + } + } else { // flip or prefetch not support + mode_lib->mp.ImmediateFlipSupported = false; + } + + // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) + bool must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required; + mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + for (k = 0; k < s->num_active_planes; ++k) + dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); +#endif + dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + } + + for (k = 0; k < s->num_active_planes; ++k) + dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + + if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { + dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + } else { + dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + + // DCC Configuration + for (k = 0; k < s->num_active_planes; ++k) { +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); +#endif + CalculateDCCConfiguration( + display_cfg->plane_descriptors[k].surface.dcc.enable, + display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown, + display_cfg->plane_descriptors[k].pixel_format, + display_cfg->plane_descriptors[k].surface.plane0.width, + display_cfg->plane_descriptors[k].surface.plane1.width, + display_cfg->plane_descriptors[k].surface.plane0.height, + display_cfg->plane_descriptors[k].surface.plane1.height, + s->NomDETInKByte, + mode_lib->mp.Read256BlockHeightY[k], + mode_lib->mp.Read256BlockHeightC[k], + display_cfg->plane_descriptors[k].surface.tiling, + mode_lib->mp.BytePerPixelY[k], + mode_lib->mp.BytePerPixelC[k], + mode_lib->mp.BytePerPixelInDETY[k], + mode_lib->mp.BytePerPixelInDETC[k], + display_cfg->plane_descriptors[k].composition.rotation_angle, + + /* Output */ + &mode_lib->mp.RequestLuma[k], + &mode_lib->mp.RequestChroma[k], + &mode_lib->mp.DCCYMaxUncompressedBlock[k], + &mode_lib->mp.DCCCMaxUncompressedBlock[k], + &mode_lib->mp.DCCYMaxCompressedBlock[k], + &mode_lib->mp.DCCCMaxCompressedBlock[k], + &mode_lib->mp.DCCYIndependentBlock[k], + &mode_lib->mp.DCCCIndependentBlock[k]); + } + + //Watermarks and NB P-State/DRAM Clock Change Support + s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency; + s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency; + s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr; + s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mmSOCParameters.USRRetrainingLatency = 0; //0; //FIXME_STAGE2 + s->mmSOCParameters.SMNLatency = 0; //mode_lib->soc.smn_latency_us; //FIXME_STAGE2 + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false/*FIXME_STAGE2 was: mode_lib->ms.policy.USRRetrainingRequired, no new dml2 replacement*/; + CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = s->SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; + //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 + CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height; + CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark; + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported; + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL; + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport; + CalculateWatermarks_params->VActiveLatencyHidingMargin = 0; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); + } else { + mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0; + mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0; + } + } + + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + + //Display Pipeline Delivery Time in Prefetch, Groups + CalculatePixelDeliveryTimes( + display_cfg, + cfg_support_info, + s->num_active_planes, + mode_lib->mp.VRatioPrefetchY, + mode_lib->mp.VRatioPrefetchC, + mode_lib->mp.swath_width_luma_ub, + mode_lib->mp.swath_width_chroma_ub, + mode_lib->mp.PSCL_THROUGHPUT, + mode_lib->mp.PSCL_THROUGHPUT_CHROMA, + mode_lib->mp.Dppclk, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.req_per_swath_ub_l, + mode_lib->mp.req_per_swath_ub_c, + + /* Output */ + mode_lib->mp.DisplayPipeLineDeliveryTimeLuma, + mode_lib->mp.DisplayPipeLineDeliveryTimeChroma, + mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma, + mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch, + mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch); + + CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch; + CalculateMetaAndPTETimes_params->display_cfg = display_cfg; + CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame; + CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank; + CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip; + CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC; + CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height; + CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma; + CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes; + CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY; + CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY; + CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC; + CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC; + CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub; + CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub; + CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub; + CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present; + + CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes; + CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes; + CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width; + CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma; + CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height; + CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma; + CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width; + CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma; + CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height; + CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma; + + CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma; + CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma; + CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L; + CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C; + CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal; + CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank; + CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip; + CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip; + + CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params); + + CalculateVMGroupAndRequestTimes( + display_cfg, + s->num_active_planes, + mode_lib->mp.BytePerPixelC, + mode_lib->mp.dst_y_per_vm_vblank, + mode_lib->mp.dst_y_per_vm_flip, + mode_lib->mp.dpte_row_width_luma_ub, + mode_lib->mp.dpte_row_width_chroma_ub, + mode_lib->mp.vm_group_bytes, + mode_lib->mp.dpde0_bytes_per_frame_ub_l, + mode_lib->mp.dpde0_bytes_per_frame_ub_c, + s->tdlut_pte_bytes_per_frame, + mode_lib->mp.meta_pte_bytes_per_frame_ub_l, + mode_lib->mp.meta_pte_bytes_per_frame_ub_c, + mode_lib->ip.dcn_mrq_present, + + /* Output */ + mode_lib->mp.TimePerVMGroupVBlank, + mode_lib->mp.TimePerVMGroupFlip, + mode_lib->mp.TimePerVMRequestVBlank, + mode_lib->mp.TimePerVMRequestFlip); + + // VStartup Adjustment + for (k = 0; k < s->num_active_planes; ++k) { + + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency; + if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable) + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); +#endif + + mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; + if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) { + mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin; + } + + bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported); + + // The actual positioning of the vstartup + mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]); + + s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) : + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0; + s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; + + if (s->blank_lines_remaining < 0) { + dml2_printf("ERROR: Vstartup is larger than vblank!?\n"); + s->blank_lines_remaining = 0; + DML2_ASSERT(0); + } + mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; + + // debug only + if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <= + (isInterlaceTiming ? + math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) : + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true; + } else { + mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); +#endif + } + + //Maximum Bandwidth Used + s->TotalWRBandwidth = 0; + s->WRBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) { + s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4; + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8; + } + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; + } + + mode_lib->mp.TotalDataReadBandwidth = 0; + for (k = 0; k < s->num_active_planes; ++k) { + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k]; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); + dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); +#endif + } + + CalculateStutterEfficiency_params->display_cfg = display_cfg; + CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte; + CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries; + CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries; + CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes; + CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes; + CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes; + CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth; + CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk; + CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active]; + CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs; + CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; + CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank; + CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP; + CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY; + CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY; + CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; + CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY; + CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY; + CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC; + CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY; + CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY; + CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC; + CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; + CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; + CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; + CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; + CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; + + // output + CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency; + CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod; + CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); + +#ifdef __DML_VBA_ALLOW_DELTA__ + // Calculate z8 stutter eff assuming 0 reserved space + CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0; + CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0; + + CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase; + CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase; + CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; + CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase; + + // Stutter Efficiency + CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params); +#else + mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank; + mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency; + mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod; +#endif + } // PrefetchAndImmediateFlipSupported + + const long min_return_uclk_cycles = 83; + const long min_return_fclk_cycles = 75; + double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0; + double max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0; + double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0; + double min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; + mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; + mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); + mode_lib->mp.dcfclk_deep_sleep_hysteresis = 255; + DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles); + dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles); + dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + dml2_printf("DML::%s: --- END --- \n", __func__); +#endif + + return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); +} + +static bool dml_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) +{ + unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; + return plane_idx; +} + +static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs) +{ + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = 0; + wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); +} + +static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (unsigned int)(math_log2((float)a) - subtrahend); +} + +void dml2_core_shared_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p) +{ + int dst_x_offset = (int)((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) - + (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio); + cursor_dlg_regs->dst_x_offset = (unsigned int)((dst_x_offset > 0) ? dst_x_offset : 0); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); +#endif + + cursor_dlg_regs->chunk_hdl_adjust = 3; + cursor_dlg_regs->dst_y_offset = 0; + + cursor_dlg_regs->qos_level_fixed = 8; + cursor_dlg_regs->qos_ramp_disable = 0; +} + +static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + unsigned int pipe_idx) +{ + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + + unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format; + enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling; + bool dual_plane = dml_is_dual_plane((enum dml2_source_format_class)(source_format)); + + unsigned int pixel_chunk_bytes = 0; + unsigned int min_pixel_chunk_bytes = 0; + unsigned int dpte_group_bytes = 0; + unsigned int mpte_group_bytes = 0; + + unsigned int p1_pixel_chunk_bytes = 0; + unsigned int p1_min_pixel_chunk_bytes = 0; + unsigned int p1_dpte_group_bytes = 0; + unsigned int p1_mpte_group_bytes = 0; + + pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); + min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); + + if (pixel_chunk_bytes == 64 * 1024) + min_pixel_chunk_bytes = 0; + + dpte_group_bytes = (unsigned int)(mode_lib->mp.dpte_group_bytes[mode_lib->mp.pipe_plane[pipe_idx]]); + mpte_group_bytes = (unsigned int)(mode_lib->mp.vm_group_bytes[mode_lib->mp.pipe_plane[pipe_idx]]); + + p1_pixel_chunk_bytes = pixel_chunk_bytes; + p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes; + p1_dpte_group_bytes = dpte_group_bytes; + p1_mpte_group_bytes = mpte_group_bytes; + + if (source_format == dml2_rgbe_alpha) + p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024); + + rq_regs->unbounded_request_enabled = mode_lib->mp.UnboundedRequestEnabled; + rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10); + rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10); + + if (min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_l.min_chunk_size = 0; + else + rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1); + + if (p1_min_pixel_chunk_bytes == 0) + rq_regs->rq_regs_c.min_chunk_size = 0; + else + rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1); + + rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6); + rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6); + rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6); + rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6); + + unsigned int detile_buf_size_in_bytes = (unsigned int)(mode_lib->mp.DETBufferSizeInKByte[mode_lib->mp.pipe_plane[pipe_idx]] * 1024); + unsigned int detile_buf_plane1_addr = 0; + + if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { + unsigned int p0_pte_row_height_linear = (unsigned int)(mode_lib->mp.dpte_row_height_linear[mode_lib->mp.pipe_plane[pipe_idx]]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); +#endif + DML2_ASSERT(p0_pte_row_height_linear >= 8); + + rq_regs->rq_regs_l.pte_row_height_linear = (unsigned int)(math_floor2(math_log2((float)p0_pte_row_height_linear), 1) - 3); + if (dual_plane) { + unsigned int p1_pte_row_height_linear = (unsigned int)(mode_lib->mp.dpte_row_height_linear_chroma[mode_lib->mp.pipe_plane[pipe_idx]]); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); +#endif + if (sw_mode == dml2_sw_linear) { + DML2_ASSERT(p1_pte_row_height_linear >= 8); + } + + rq_regs->rq_regs_c.pte_row_height_linear = (unsigned int)(math_floor2(math_log2((float)p1_pte_row_height_linear), 1) - 3); + } + } else { + rq_regs->rq_regs_l.pte_row_height_linear = 0; + rq_regs->rq_regs_c.pte_row_height_linear = 0; + } + + rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(mode_lib->mp.SwathHeightY[mode_lib->mp.pipe_plane[pipe_idx]], 0); + rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(mode_lib->mp.SwathHeightC[mode_lib->mp.pipe_plane[pipe_idx]], 0); + + // FIXME_DCN4, programming guide has dGPU condition + if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + + double stored_swath_l_bytes = mode_lib->mp.DETBufferSizeY[mode_lib->mp.pipe_plane[pipe_idx]]; + double stored_swath_c_bytes = mode_lib->mp.DETBufferSizeC[mode_lib->mp.pipe_plane[pipe_idx]]; + bool is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx); + + // Note: detile_buf_plane1_addr is in unit of 1KB + if (dual_plane) { + if (is_phantom_pipe) { + detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma + } else { + if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { + detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } else { + detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); +#endif + } + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); +#endif + //dml2_printf_rq_regs_st(rq_regs); + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); +} + +static void rq_dlg_get_dlg_reg( + struct dml2_core_internal_scratch *s, + struct dml2_display_dlg_regs *disp_dlg_regs, + struct dml2_display_ttu_regs *disp_ttu_regs, + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_display_mode_lib *mode_lib, + const unsigned int pipe_idx) +{ + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals; + + memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); + + dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + + l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); + dml2_assert(l->plane_idx < DML2_MAX_PLANES); + + l->source_format = dml2_444_8; + l->dual_plane = dml_is_dual_plane(l->source_format); + l->odm_mode = dml2_odm_mode_bypass; + + l->htotal = 0; + l->hactive = 0; + l->hblank_end = 0; + l->vblank_end = 0; + l->interlaced = false; + l->pclk_freq_in_mhz = 0.0; + l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + l->ref_freq_to_pix_freq = 0.0; + + if (l->plane_idx < DML2_MAX_PLANES) { + + l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing; + l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format; + l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx]; + + l->htotal = l->timing->h_total; + l->hactive = l->timing->h_active; + l->hblank_end = l->timing->h_blank_end; + l->vblank_end = l->timing->v_blank_end; + l->interlaced = l->timing->interlaced; + l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; + l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; + + dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz); + dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + + DML2_ASSERT(l->refclk_freq_in_mhz != 0); + DML2_ASSERT(l->pclk_freq_in_mhz != 0); + DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0); + + // Need to figure out which side of odm combine we're in + // Assume the pipe instance under the same plane is in order + + if (l->odm_mode == dml2_odm_mode_bypass) { + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq); + } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) { + // find out how many pipe are in this plane + l->num_active_pipes = mode_lib->mp.num_active_pipes; + l->first_pipe_idx_in_plane = DML2_MAX_PLANES; + l->pipe_idx_in_combine = 0; // pipe index within the plane + l->odm_combine_factor = 2; + + if (l->odm_mode == dml2_odm_mode_combine_3to1) + l->odm_combine_factor = 3; + else if (l->odm_mode == dml2_odm_mode_combine_4to1) + l->odm_combine_factor = 4; + + for (unsigned int i = 0; i < l->num_active_pipes; i++) { + if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) { + if (i < l->first_pipe_idx_in_plane) { + l->first_pipe_idx_in_plane = i; + } + } + } + l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) + + disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); + dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + } + dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + + DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + + disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits + + l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); + + dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + + l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); + disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; + + dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + + l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); + + dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + + l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; + + l->max_dst_y_per_vm_vblank = 32.0; //U5.2 + l->max_dst_y_per_row_vblank = 16.0; //U4.2 + + // magic! + if (l->htotal <= 75) { + l->max_dst_y_per_vm_vblank = 100.0; + l->max_dst_y_per_row_vblank = 100.0; + } + + dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + + DML2_ASSERT(l->dst_y_per_vm_vblank < l->max_dst_y_per_vm_vblank); + DML2_ASSERT(l->dst_y_per_row_vblank < l->max_dst_y_per_row_vblank); + if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { + DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + } + + l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; + l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; + + dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + + // Active + l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + + l->refcyc_per_line_delivery_pre_c = 0.0; + l->refcyc_per_line_delivery_c = 0.0; + + if (l->dual_plane) { + l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + } + + disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + + l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + + l->refcyc_per_req_delivery_pre_c = 0.0; + l->refcyc_per_req_delivery_c = 0.0; + if (l->dual_plane) { + l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + } + + // TTU - Cursor + DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + + // Assign to register structures + disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); + DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + + disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk + disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); + + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10)); + + l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2)); + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l); + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l); + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c); + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l); + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c); + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1); + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1); + + l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]]; + l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]]; + l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2)); + disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2)); + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l); + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l); + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c); + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l); + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c); + + disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group); + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + + disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq); + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); + + // CHECK for HW registers' range, DML2_ASSERT or clamp + DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1); + + if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); + + + DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + + if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { + dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; + } + if (l->dual_plane) { + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { + dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; + } + } + + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1); + if (l->dual_plane) { + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); + } + DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + if (l->dual_plane) { + DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + } + + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + + dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + + } +} + +static void rq_dlg_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) +{ + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; + arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max + arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; + arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit; + arb_param->sat_level_us = 60; + arb_param->hvm_max_qos_commit_threshold = 0xf; + arb_param->hvm_min_req_outstand_commit_threshold = 0xa; + arb_param->compbuf_reserved_space_kbytes = mode_lib->mp.compbuf_reserved_space_64b * 64 / 1024; + arb_param->allow_sdpif_rate_limit_when_cstate_req = mode_lib->mp.hw_debug5; + arb_param->dcfclk_deep_sleep_hysteresis = mode_lib->mp.dcfclk_deep_sleep_hysteresis; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); +#endif + +} + +void dml2_core_shared_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out) +{ + rq_dlg_get_wm_regs(display_cfg, mode_lib, out); +} + +void dml2_core_shared_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out) +{ + rq_dlg_get_arb_params(mode_lib, out); +} + +void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, + struct dml2_core_internal_display_mode_lib *mode_lib, + struct dml2_dchub_per_pipe_register_set *out, int pipe_index) +{ + rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index); + rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index); + out->det_size = mode_lib->mp.DETBufferSizeInKByte[mode_lib->mp.pipe_plane[pipe_index]] / mode_lib->ip.config_return_buffer_segment_size_in_kbytes; +} + +void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) +{ + // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + + out->global_sync.dcn4.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; +} + +void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) +{ + unsigned int n; + + out->num_mcaches_plane0 = mode_lib->ms.num_mcaches_l[plane_idx]; + out->num_mcaches_plane1 = mode_lib->ms.num_mcaches_c[plane_idx]; + out->shift_granularity.p0 = mode_lib->ms.mcache_shift_granularity_l[plane_idx]; + out->shift_granularity.p1 = mode_lib->ms.mcache_shift_granularity_c[plane_idx]; + + for (n = 0; n < out->num_mcaches_plane0; n++) + out->mcache_x_offsets_plane0[n] = mode_lib->ms.mcache_offsets_l[plane_idx][n]; + + for (n = 0; n < out->num_mcaches_plane1; n++) + out->mcache_x_offsets_plane1[n] = mode_lib->ms.mcache_offsets_l[plane_idx][n]; + + out->last_slice_sharing.mall_comb_mcache_p0 = mode_lib->ms.mall_comb_mcache_l[plane_idx]; + out->last_slice_sharing.mall_comb_mcache_p1 = mode_lib->ms.mall_comb_mcache_c[plane_idx]; + out->last_slice_sharing.plane0_plane1 = mode_lib->ms.lc_comb_mcache[plane_idx]; + out->informative.meta_row_bytes_plane0 = mode_lib->ms.mcache_row_bytes_l[plane_idx]; + out->informative.meta_row_bytes_plane1 = mode_lib->ms.mcache_row_bytes_c[plane_idx]; + + out->valid = true; +} + +void dml2_core_shared_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index) +{ + *out = mode_lib->mp.SurfaceSizeInTheMALL[mode_lib->mp.pipe_plane[pipe_index]]; +} + +void dml2_core_shared_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx) +{ + out->mall_svp_size_requirement_ways = 0; + + out->nominal_vblank_pstate_latency_hiding_us = + (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]); + + out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx]; + + out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; +} + +void dml2_core_shared_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) +{ + double phantom_processing_delay_pix; + unsigned int phantom_processing_delay_lines; + unsigned int phantom_v_active_lines; + unsigned int phantom_v_startup_lines; + unsigned int phantom_v_blank_lines; + unsigned int main_v_blank_lines; + unsigned int rem; + + phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); + phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); + dml2_core_shared_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); + if (rem) + phantom_processing_delay_lines++; + + phantom_v_startup_lines = mode_lib->ms.MaxVStartupLines[plane_index]; + phantom_v_active_lines = phantom_processing_delay_lines + mode_lib->ms.SubViewportLinesNeededInMALL[plane_index] + mode_lib->ip.subvp_swath_height_margin_lines; + + // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) + phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; + main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active; + if (phantom_v_blank_lines > main_v_blank_lines) + phantom_v_blank_lines = main_v_blank_lines; + + out->phantom_v_active = phantom_v_active_lines; + // phantom_vtotal = vactive + vblank + out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; + + out->phantom_min_v_active = mode_lib->ms.SubViewportLinesNeededInMALL[plane_index]; + out->phantom_v_startup = mode_lib->ms.MaxVStartupLines[plane_index]; + + out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us); +#endif +} + +void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out) +{ + unsigned int k, n; + + out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport; + out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport; + out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport; + out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; + out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; + out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; + out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; + out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; + out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated; + out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP; + out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink; + out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO; + out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport; + out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport; + out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support; + out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport; + out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport; + out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport; + out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface; + out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false; + out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen; + out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; + out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; + out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + + out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; + out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; + out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices; + out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport; + out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported; + out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported; + out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport; + + out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport; + out->informative.mode_support_info.ROBUrgencyAvoidance = mode_lib->ms.support.ROBUrgencyAvoidance; + out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport; + out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance; + out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded; + out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded; + + out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport; + out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport; + out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport; + + out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported; + out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported; + out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported; + out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support; + out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport; + out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport; + + for (k = 0; k < out->display_config.num_planes; k++) { + + out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k]; + out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k]; + out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k]; + out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k]; + out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k]; + out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k]; + out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k]; + out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k]; + + if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi; + else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl) + out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl; + + if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; + + out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; + out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; + } + + out->informative.watermarks.urgent_us = mode_lib->mp.Watermark.UrgentWatermark; + out->informative.watermarks.writeback_urgent_us = mode_lib->mp.Watermark.WritebackUrgentWatermark; + out->informative.watermarks.writeback_pstate_us = mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark; + out->informative.watermarks.writeback_fclk_pstate_us = mode_lib->mp.Watermark.WritebackFCLKChangeWatermark; + + out->informative.watermarks.cstate_exit_us = mode_lib->mp.Watermark.StutterExitWatermark; + out->informative.watermarks.cstate_enter_plus_exit_us = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark; + out->informative.watermarks.z8_cstate_exit_us = mode_lib->mp.Watermark.Z8StutterExitWatermark; + out->informative.watermarks.z8_cstate_enter_plus_exit_us = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark; + out->informative.watermarks.pstate_change_us = mode_lib->mp.Watermark.DRAMClockChangeWatermark; + out->informative.watermarks.fclk_pstate_change_us = mode_lib->mp.Watermark.FCLKChangeWatermark; + out->informative.watermarks.usr_retraining_us = mode_lib->mp.Watermark.USRRetrainingWatermark; + + out->informative.mall.total_surface_size_in_mall_bytes = 0; + for (k = 0; k < out->display_config.num_planes; ++k) + out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k]; + + out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk; + out->informative.qos.urgent_latency_us = mode_lib->mp.UrgentLatency; + + out->informative.qos.max_non_urgent_latency_us = mode_lib->ms.support.max_non_urgent_latency_us; + out->informative.qos.max_urgent_latency_us = mode_lib->ms.support.max_urgent_latency_us; + out->informative.qos.avg_non_urgent_latency_us = mode_lib->ms.support.avg_non_urgent_latency_us; + out->informative.qos.avg_urgent_latency_us = mode_lib->ms.support.avg_urgent_latency_us; + + out->informative.qos.wm_memory_trip_us = mode_lib->mp.UrgentLatency; + out->informative.qos.meta_trip_memory_us = mode_lib->mp.MetaTripToMemory; + out->informative.qos.fraction_of_urgent_bandwidth = mode_lib->mp.FractionOfUrgentBandwidth; + out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip; + out->informative.qos.fraction_of_urgent_bandwidth_mall = mode_lib->mp.FractionOfUrgentBandwidthMALL; + + out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = + mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = + mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = + mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = + mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = + mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = + mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = + mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = + mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = + mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = + mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]; + + out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = + mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = + mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = + mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]; + + out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]; + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]; + out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]; + + out->informative.crb.comp_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; + out->informative.crb.UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled; + + out->informative.crb.compbuf_reserved_space_64b = mode_lib->mp.compbuf_reserved_space_64b; + out->informative.misc.hw_debug5 = mode_lib->mp.hw_debug5; + out->informative.misc.dcfclk_deep_sleep_hysteresis = mode_lib->mp.dcfclk_deep_sleep_hysteresis; + + out->informative.power_management.stutter_efficiency = mode_lib->mp.StutterEfficiencyNotIncludingVBlank; + out->informative.power_management.stutter_efficiency_with_vblank = mode_lib->mp.StutterEfficiency; + out->informative.power_management.stutter_num_bursts = mode_lib->mp.NumberOfStutterBurstsPerFrame; + + out->informative.power_management.z8.stutter_efficiency = mode_lib->mp.Z8StutterEfficiency; + out->informative.power_management.z8.stutter_efficiency_with_vblank = mode_lib->mp.StutterEfficiency; + out->informative.power_management.z8.stutter_num_bursts = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame; + out->informative.power_management.z8.stutter_period = mode_lib->mp.StutterPeriod; + + out->informative.power_management.z8.bestcase.stutter_efficiency = mode_lib->mp.Z8StutterEfficiencyBestCase; + out->informative.power_management.z8.bestcase.stutter_num_bursts = mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase; + out->informative.power_management.z8.bestcase.stutter_period = mode_lib->mp.StutterPeriodBestCase; + + out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + + out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); + + out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported; + + for (k = 0; k < out->display_config.num_planes; k++) { + + if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 0; + else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us) + && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)) + out->informative.misc.PrefetchMode[k] = 1; + else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us) + out->informative.misc.PrefetchMode[k] = 2; + else + out->informative.misc.PrefetchMode[k] = 3; + + out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k]; + out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k]; + out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k]; + out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k]; + out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k]; + out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k]; + out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k]; + out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k]; + out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k]; + out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k]; + out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k]; + out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k]; + out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k]; + out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k]; + out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k]; + out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k]; + out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k]; + out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]; + out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k]; + out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k]; + out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k]; + out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k]; + out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k]; + out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k]; + out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k]; + out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k]; + out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k]; + out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k]; + out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k]; + out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k]; + out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k]; + out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k]; + out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k]; + out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k]; + out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k]; + out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k]; + out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k]; + + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k]; + out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k]; + out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k]; + out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k]; + out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k]; + out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k]; + out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k]; + out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k]; + out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k]; + out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k]; + out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k]; + out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k]; + out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k]; + out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k]; + out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; + out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + + out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; + out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; + out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; + out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k]; + out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; + out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; + out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + } + + // For this DV informative layer, all pipes in the same planes will just use the same id + // will have the optimization and helper layer later on + // only work when we can have high "mcache" that fit everything without thrashing the cache + for (k = 0; k < out->display_config.num_planes; k++) { + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = mode_lib->ms.num_mcaches_l[k]; + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = mode_lib->ms.mcache_row_bytes_l[k]; + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = mode_lib->ms.mcache_offsets_l[k][n]; + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k; + } + + out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = mode_lib->ms.num_mcaches_c[k]; + out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = mode_lib->ms.mcache_row_bytes_c[k]; + + for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) { + out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = mode_lib->ms.mcache_offsets_c[k][n]; + out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h new file mode 100644 index 000000000000..d76bda907ec8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_CORE_SHARED_H__ +#define __DML2_CORE_SHARED_H__ + +#define __DML_VBA_DEBUG__ +#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 //<brief Prefetch schedule max vratio for one to one scheduling calculation for prefetch +#define __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ 6.0 //<brief Prefetch schedule max vratio when enhance prefetch schedule acceleration is enabled and vstartup is earliest possible already +#define __DML2_CALCS_DPP_INVALID__ 0 +#define __DML2_CALCS_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation +#define __DML2_CALCS_PIPE_NO_PLANE__ 99 + +#include "dml2_core_shared_types.h" +#include "dml2_internal_shared_types.h" + +double dml2_core_shared_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); + +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +bool dml2_core_shared_is_420(enum dml2_source_format_class source_format); + +bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params); +bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params); +void dml2_core_shared_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); +void dml2_core_shared_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); +void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); +void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); +void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx); +void dml2_core_shared_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); +void dml2_core_shared_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx); +void dml2_core_shared_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); +void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); +void dml2_core_shared_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h new file mode 100644 index 000000000000..baded2315254 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -0,0 +1,1948 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_CORE_SHARED_TYPES_H__ +#define __DML2_CORE_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_display_cfg_types.h" +#include "dml_top_types.h" + +struct dml2_core_ip_params { + unsigned int vblank_nom_default_us; + unsigned int remote_iommu_outstanding_translations; + unsigned int rob_buffer_size_kbytes; + unsigned int config_return_buffer_size_in_kbytes; + unsigned int config_return_buffer_segment_size_in_kbytes; + unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int meta_fifo_size_in_kentries; + unsigned int dpte_buffer_size_in_pte_reqs_luma; + unsigned int dpte_buffer_size_in_pte_reqs_chroma; + unsigned int pixel_chunk_size_kbytes; + unsigned int alpha_pixel_chunk_size_kbytes; + unsigned int min_pixel_chunk_size_bytes; + unsigned int writeback_chunk_size_kbytes; + unsigned int line_buffer_size_bits; + unsigned int max_line_buffer_lines; + unsigned int writeback_interface_buffer_size_kbytes; + unsigned int max_num_dpp; + unsigned int max_num_otg; + unsigned int max_num_wb; + unsigned int max_dchub_pscl_bw_pix_per_clk; + unsigned int max_pscl_lb_bw_pix_per_clk; + unsigned int max_lb_vscl_bw_pix_per_clk; + unsigned int max_vscl_hscl_bw_pix_per_clk; + double max_hscl_ratio; + double max_vscl_ratio; + unsigned int max_hscl_taps; + unsigned int max_vscl_taps; + unsigned int num_dsc; + unsigned int maximum_dsc_bits_per_component; + unsigned int maximum_pixels_per_line_per_dsc_unit; + bool dsc422_native_support; + bool cursor_64bpp_support; + double dispclk_ramp_margin_percent; + unsigned int dppclk_delay_subtotal; + unsigned int dppclk_delay_scl; + unsigned int dppclk_delay_scl_lb_only; + unsigned int dppclk_delay_cnvc_formatter; + unsigned int dppclk_delay_cnvc_cursor; + unsigned int cursor_buffer_size; + unsigned int cursor_chunk_size; + unsigned int dispclk_delay_subtotal; + bool dynamic_metadata_vm_enabled; + unsigned int max_inter_dcn_tile_repeaters; + unsigned int max_num_hdmi_frl_outputs; + unsigned int max_num_dp2p0_outputs; + unsigned int max_num_dp2p0_streams; + bool dcc_supported; + bool ptoi_supported; + double writeback_max_hscl_ratio; + double writeback_max_vscl_ratio; + double writeback_min_hscl_ratio; + double writeback_min_vscl_ratio; + unsigned int writeback_max_hscl_taps; + unsigned int writeback_max_vscl_taps; + unsigned int writeback_line_buffer_buffer_size; + + unsigned int words_per_channel; + bool imall_supported; + unsigned int max_flip_time_us; + unsigned int subvp_swath_height_margin_lines; + unsigned int subvp_fw_processing_delay_us; + unsigned int subvp_pstate_allow_width_us; + double max_vactive_det_fill_delay_us; + + // MRQ + bool dcn_mrq_present; + unsigned int zero_size_buffer_entries; + unsigned int compbuf_reserved_space_zs; + unsigned int dcc_meta_buffer_size_bytes; + unsigned int meta_chunk_size_kbytes; + unsigned int min_meta_chunk_size_bytes; + + unsigned int dchub_arb_to_ret_delay; // num of dcfclk + unsigned int hostvm_mode; +}; + +struct dml2_core_internal_DmlPipe { + double Dppclk; + double Dispclk; + double PixelClock; + double DCFClkDeepSleep; + unsigned int DPPPerSurface; + bool ScalerEnabled; + enum dml2_rotation_angle RotationAngle; + bool mirrored; + unsigned int ViewportHeight; + unsigned int ViewportHeightC; + unsigned int BlockWidth256BytesY; + unsigned int BlockHeight256BytesY; + unsigned int BlockWidth256BytesC; + unsigned int BlockHeight256BytesC; + unsigned int BlockWidthY; + unsigned int BlockHeightY; + unsigned int BlockWidthC; + unsigned int BlockHeightC; + unsigned int InterlaceEnable; + unsigned int NumberOfCursors; + unsigned int VBlank; + unsigned int HTotal; + unsigned int HActive; + bool DCCEnable; + enum dml2_odm_mode ODMMode; + enum dml2_source_format_class SourcePixelFormat; + enum dml2_swizzle_mode SurfaceTiling; + unsigned int BytePerPixelY; + unsigned int BytePerPixelC; + bool ProgressiveToInterlaceUnitInOPP; + double VRatio; + double VRatioChroma; + unsigned int VTaps; + unsigned int VTapsChroma; + unsigned int PitchY; + unsigned int PitchC; + bool ViewportStationary; + unsigned int ViewportXStart; + unsigned int ViewportYStart; + unsigned int ViewportXStartC; + unsigned int ViewportYStartC; + bool FORCE_ONE_ROW_FOR_FRAME; + unsigned int SwathHeightY; + unsigned int SwathHeightC; + + unsigned int DCCMetaPitchY; + unsigned int DCCMetaPitchC; +}; + +enum dml2_core_internal_request_type { + dml2_core_internal_request_type_256_bytes = 0, + dml2_core_internal_request_type_128_bytes_non_contiguous = 1, + dml2_core_internal_request_type_128_bytes_contiguous = 2, + dml2_core_internal_request_type_na = 3 +}; +enum dml2_core_internal_bw_type { + dml2_core_internal_bw_sdp = 0, + dml2_core_internal_bw_dram = 1, + dml2_core_internal_bw_max +}; + +enum dml2_core_internal_soc_state_type { + dml2_core_internal_soc_state_sys_active = 0, + dml2_core_internal_soc_state_svp_prefetch = 1, + dml2_core_internal_soc_state_sys_idle = 2, + dml2_core_internal_soc_state_max +}; + +enum dml2_core_internal_output_type { + dml2_core_internal_output_type_unknown = 0, + dml2_core_internal_output_type_dp = 1, + dml2_core_internal_output_type_edp = 2, + dml2_core_internal_output_type_dp2p0 = 3, + dml2_core_internal_output_type_hdmi = 4, + dml2_core_internal_output_type_hdmifrl = 5 +}; + +enum dml2_core_internal_output_type_rate { + dml2_core_internal_output_rate_unknown = 0, + dml2_core_internal_output_rate_dp_rate_hbr = 1, + dml2_core_internal_output_rate_dp_rate_hbr2 = 2, + dml2_core_internal_output_rate_dp_rate_hbr3 = 3, + dml2_core_internal_output_rate_dp_rate_uhbr10 = 4, + dml2_core_internal_output_rate_dp_rate_uhbr13p5 = 5, + dml2_core_internal_output_rate_dp_rate_uhbr20 = 6, + dml2_core_internal_output_rate_hdmi_rate_3x3 = 7, + dml2_core_internal_output_rate_hdmi_rate_6x3 = 8, + dml2_core_internal_output_rate_hdmi_rate_6x4 = 9, + dml2_core_internal_output_rate_hdmi_rate_8x4 = 10, + dml2_core_internal_output_rate_hdmi_rate_10x4 = 11, + dml2_core_internal_output_rate_hdmi_rate_12x4 = 12 +}; + +struct dml2_core_internal_watermarks { + double UrgentWatermark; + double WritebackUrgentWatermark; + double DRAMClockChangeWatermark; + double FCLKChangeWatermark; + double WritebackDRAMClockChangeWatermark; + double WritebackFCLKChangeWatermark; + double StutterExitWatermark; + double StutterEnterPlusExitWatermark; + double Z8StutterExitWatermark; + double Z8StutterEnterPlusExitWatermark; + double USRRetrainingWatermark; + double g6_temp_read_watermark_us; +}; + +struct dml2_core_internal_mode_support_info { + //----------------- + // Mode Support Information + //----------------- + bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming + + // Mode Support Reason/ + bool WritebackLatencySupport; + bool ScaleRatioAndTapsSupport; + bool SourceFormatPixelAndScanSupport; + bool P2IWith420; + bool DSCOnlyIfNecessaryWithBPP; + bool DSC422NativeNotSupported; + bool LinkRateDoesNotMatchDPVersion; + bool LinkRateForMultistreamNotIndicated; + bool BPPForMultistreamNotIndicated; + bool MultistreamWithHDMIOreDP; + bool MSOOrODMSplitWithNonDPLink; + bool NotEnoughLanesForMSO; + bool NumberOfOTGSupport; + bool NumberOfHDMIFRLSupport; + bool NumberOfDP2p0Support; + bool WritebackScaleRatioAndTapsSupport; + bool CursorSupport; + bool PitchSupport; + bool ViewportExceedsSurface; + //bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified; + bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe; + bool InvalidCombinationOfMALLUseForPStateAndStaticScreen; + bool InvalidCombinationOfMALLUseForPState; + bool ExceededMALLSize; + bool EnoughWritebackUnits; + + bool ExceededMultistreamSlots; + bool NotEnoughDSCUnits; + bool NotEnoughDSCSlices; + bool PixelsPerLinePerDSCUnitSupport; + bool DSCCLKRequiredMoreThanSupported; + bool DTBCLKRequiredMoreThanSupported; + bool LinkCapacitySupport; + + bool ROBSupport; + bool ROBUrgencyAvoidance; + bool OutstandingRequestsSupport; + bool OutstandingRequestsUrgencyAvoidance; + + bool PTEBufferSizeNotExceeded; + bool DCCMetaBufferSizeNotExceeded; + enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + bool global_dram_clock_change_supported; + bool global_fclk_change_supported; + bool USRRetrainingSupport; + bool AvgBandwidthSupport; + bool UrgVactiveBandwidthSupport; + bool EnoughUrgentLatencyHidingSupport; + bool PrefetchSupported; + bool PrefetchBandwidthSupported; + bool DynamicMetadataSupported; + bool VRatioInPrefetchSupported; + bool DISPCLK_DPPCLK_Support; + bool TotalAvailablePipesSupport; + bool ModeSupport; + bool ViewportSizeSupport; + + bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage + unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4. + bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming + bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required + unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode + + double OutputBpp[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES]; + + unsigned int AlignedYPitch[DML2_MAX_PLANES]; + unsigned int AlignedCPitch[DML2_MAX_PLANES]; + + unsigned int AlignedDCCMetaPitchY[DML2_MAX_PLANES]; + unsigned int AlignedDCCMetaPitchC[DML2_MAX_PLANES]; + + unsigned int request_size_bytes_luma[DML2_MAX_PLANES]; + unsigned int request_size_bytes_chroma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES]; + + unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYIndependentBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCIndependentBlock[DML2_MAX_PLANES]; + + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max]; + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_max]; // min between SDP and DRAM, for latency evaluation + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm etc. + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc. + + double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip + + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + + bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + + double max_non_urgent_latency_us; + double max_urgent_latency_us; + double avg_non_urgent_latency_us; + double avg_urgent_latency_us; + + bool incorrect_imall_usage; + + bool g6_temp_read_support; + + struct dml2_core_internal_watermarks watermarks; +}; + +struct dml2_core_internal_mode_support { + // Physical info; only using for programming + unsigned int state_idx; // <brief min clk state table index for mode support call + unsigned int qos_param_index; // to access the uclk dependent qos_parameters table + unsigned int active_min_uclk_dpm_index; // to access the min_clk table + unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg + + // Calculated Clocks + double RequiredDISPCLK; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc. + double RequiredDPPCLK[DML2_MAX_PLANES]; + double RequiredDISPCLKPerSurface[DML2_MAX_PLANES]; + double RequiredDTBCLK[DML2_MAX_PLANES]; + + double required_dscclk_freq_mhz[DML2_MAX_PLANES]; + + double FabricClock; /// <brief Basically just the clock freq at the min (or given) state + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes + double uclk_freq_mhz; + double dram_bw_mbps; + double max_dram_bw_mbps; + + double MaxFabricClock; /// <brief Basically just the clock freq at the min (or given) state + double MaxDCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double max_dispclk_freq_mhz; + double max_dppclk_freq_mhz; + double max_dscclk_freq_mhz; + + bool NoTimeForPrefetch[DML2_MAX_PLANES]; + bool NoTimeForDynamicMetadata[DML2_MAX_PLANES]; + + // ---------------------------------- + // Mode Support Info and fail reason + // ---------------------------------- + struct dml2_core_internal_mode_support_info support; + + // These are calculated before the ModeSupport and ModeProgram step + // They represent the bound for the return buffer sizing + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + + // Info obtained at the end of mode support calculations + // The reported info is at the "optimal" state and combine setting + unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + unsigned int DETBufferSizeY[DML2_MAX_PLANES]; + unsigned int DETBufferSizeC[DML2_MAX_PLANES]; + unsigned int SwathHeightY[DML2_MAX_PLANES]; + unsigned int SwathHeightC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; + unsigned int SwathWidthC[DML2_MAX_PLANES]; + + // ---------------------------------- + // Intermediates/Informational + // ---------------------------------- + unsigned int TotImmediateFlipBytes; + bool DCCEnabledInAnySurface; + double WritebackRequiredDISPCLK; + double TimeCalc; + double TWait[DML2_MAX_PLANES]; + + bool UnboundedRequestEnabled; + unsigned int CompressedBufferSizeInkByte; + double VRatioPreY[DML2_MAX_PLANES]; + double VRatioPreC[DML2_MAX_PLANES]; + unsigned int swath_width_luma_ub[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int RequiredSlots[DML2_MAX_PLANES]; + unsigned int vm_bytes[DML2_MAX_PLANES]; + unsigned int DPTEBytesPerRow[DML2_MAX_PLANES]; + unsigned int PrefetchLinesY[DML2_MAX_PLANES]; + unsigned int PrefetchLinesC[DML2_MAX_PLANES]; + unsigned int MaxNumSwathY[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch + unsigned int MaxNumSwathC[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch + unsigned int PrefillY[DML2_MAX_PLANES]; + unsigned int PrefillC[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + bool use_one_row_for_frame[DML2_MAX_PLANES]; + bool use_one_row_for_frame_flip[DML2_MAX_PLANES]; + + double dst_y_prefetch[DML2_MAX_PLANES]; + double LinesForVM[DML2_MAX_PLANES]; + double LinesForDPTERow[DML2_MAX_PLANES]; + double SwathWidthYSingleDPP[DML2_MAX_PLANES]; + double SwathWidthCSingleDPP[DML2_MAX_PLANES]; + unsigned int BytePerPixelY[DML2_MAX_PLANES]; + unsigned int BytePerPixelC[DML2_MAX_PLANES]; + double BytePerPixelInDETY[DML2_MAX_PLANES]; + double BytePerPixelInDETC[DML2_MAX_PLANES]; + + unsigned int Read256BlockHeightY[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthY[DML2_MAX_PLANES]; + unsigned int Read256BlockHeightC[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthC[DML2_MAX_PLANES]; + unsigned int MacroTileHeightY[DML2_MAX_PLANES]; + unsigned int MacroTileHeightC[DML2_MAX_PLANES]; + unsigned int MacroTileWidthY[DML2_MAX_PLANES]; + unsigned int MacroTileWidthC[DML2_MAX_PLANES]; + + bool surf_linear128_l[DML2_MAX_PLANES]; + bool surf_linear128_c[DML2_MAX_PLANES]; + + double PSCL_FACTOR[DML2_MAX_PLANES]; + double PSCL_FACTOR_CHROMA[DML2_MAX_PLANES]; + double MaximumSwathWidthLuma[DML2_MAX_PLANES]; + double MaximumSwathWidthChroma[DML2_MAX_PLANES]; + double Tno_bw[DML2_MAX_PLANES]; + double Tno_bw_flip[DML2_MAX_PLANES]; + double dst_y_per_vm_flip[DML2_MAX_PLANES]; + double dst_y_per_row_flip[DML2_MAX_PLANES]; + double WritebackDelayTime[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + unsigned int dpte_row_height[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma[DML2_MAX_PLANES]; + double UrgLatency; + double TripToMemory; + double UrgentBurstFactorCursor[DML2_MAX_PLANES]; + double UrgentBurstFactorCursorPre[DML2_MAX_PLANES]; + double UrgentBurstFactorLuma[DML2_MAX_PLANES]; + double UrgentBurstFactorLumaPre[DML2_MAX_PLANES]; + double UrgentBurstFactorChroma[DML2_MAX_PLANES]; + double UrgentBurstFactorChromaPre[DML2_MAX_PLANES]; + double MaximumSwathWidthInLineBufferLuma; + double MaximumSwathWidthInLineBufferChroma; + double ExtraLatency; + double ExtraLatency_sr; + double ExtraLatencyPrefetch; + + double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta + double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta + double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES]; + double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; // overhead to the imall or phantom pipe + double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES]; + + // Backend + bool RequiresDSC[DML2_MAX_PLANES]; + bool RequiresFEC[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + unsigned int DSCDelay[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES]; + enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES]; + + // Bandwidth Related Info + double BandwidthAvailableForImmediateFlip; + double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; // no dcc overhead, for the plane + double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; + double WriteBandwidth[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + double cursor_bw[DML2_MAX_PLANES]; + double prefetch_cursor_bw[DML2_MAX_PLANES]; + double prefetch_vmrow_bw[DML2_MAX_PLANES]; + double final_flip_bw[DML2_MAX_PLANES]; + double meta_row_bw[DML2_MAX_PLANES]; + unsigned int meta_row_bytes[DML2_MAX_PLANES]; + double dpte_row_bw[DML2_MAX_PLANES]; + + // Something that should be feedback to caller + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; + unsigned int SurfaceSizeInMALL[DML2_MAX_PLANES]; + unsigned int NoOfDPP[DML2_MAX_PLANES]; + bool MPCCombine[DML2_MAX_PLANES]; + double dcfclk_deepsleep; + double MinDPPCLKUsingSingleDPP[DML2_MAX_PLANES]; + bool SingleDPPViewportSizeSupportPerSurface[DML2_MAX_PLANES]; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES]; + bool PTEBufferSizeNotExceeded[DML2_MAX_PLANES]; + bool DCCMetaBufferSizeNotExceeded[DML2_MAX_PLANES]; + unsigned int TotalNumberOfActiveDPP; + unsigned int TotalNumberOfSingleDPPSurfaces; + unsigned int TotalNumberOfDCCActiveDPP; + unsigned int Total3dlutActive; + + unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES]; + double VActiveLatencyHidingMargin[DML2_MAX_PLANES]; + double VActiveLatencyHidingUs[DML2_MAX_PLANES]; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; + + unsigned int num_mcaches_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_l[DML2_MAX_PLANES]; + unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES]; + + unsigned int num_mcaches_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_c[DML2_MAX_PLANES]; + unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES]; + + bool mall_comb_mcache_l[DML2_MAX_PLANES]; + bool mall_comb_mcache_c[DML2_MAX_PLANES]; + bool lc_comb_mcache[DML2_MAX_PLANES]; + + +}; + +/// @brief A mega structure that houses various info for model programming step. +struct dml2_core_internal_mode_program { + unsigned int qos_param_index; // to access the uclk dependent dpm table + unsigned int active_min_uclk_dpm_index; // to access the min_clk table + double FabricClock; /// <brief Basically just the clock freq at the min (or given) state + double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + double dram_bw_mbps; + double uclk_freq_mhz; + unsigned int NoOfDPP[DML2_MAX_PLANES]; + enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; + + //------------- + // Intermediate/Informational + //------------- + double UrgentLatency; + double TripToMemory; + double MetaTripToMemory; + unsigned int VInitPreFillY[DML2_MAX_PLANES]; + unsigned int VInitPreFillC[DML2_MAX_PLANES]; + unsigned int MaxNumSwathY[DML2_MAX_PLANES]; + unsigned int MaxNumSwathC[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + double BytePerPixelInDETY[DML2_MAX_PLANES]; + double BytePerPixelInDETC[DML2_MAX_PLANES]; + unsigned int BytePerPixelY[DML2_MAX_PLANES]; + unsigned int BytePerPixelC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; + unsigned int SwathWidthC[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES]; + double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; + double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; + + unsigned int PixelPTEBytesPerRow[DML2_MAX_PLANES]; + unsigned int vm_bytes[DML2_MAX_PLANES]; + unsigned int PrefetchSourceLinesY[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; + double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + unsigned int PrefetchSourceLinesC[DML2_MAX_PLANES]; + double PSCL_THROUGHPUT[DML2_MAX_PLANES]; + double PSCL_THROUGHPUT_CHROMA[DML2_MAX_PLANES]; + unsigned int DSCDelay[DML2_MAX_PLANES]; + double DPPCLKUsingSingleDPP[DML2_MAX_PLANES]; + + unsigned int Read256BlockHeightY[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthY[DML2_MAX_PLANES]; + unsigned int Read256BlockHeightC[DML2_MAX_PLANES]; + unsigned int Read256BlockWidthC[DML2_MAX_PLANES]; + unsigned int MacroTileHeightY[DML2_MAX_PLANES]; + unsigned int MacroTileHeightC[DML2_MAX_PLANES]; + unsigned int MacroTileWidthY[DML2_MAX_PLANES]; + unsigned int MacroTileWidthC[DML2_MAX_PLANES]; + + bool surf_linear128_l[DML2_MAX_PLANES]; + bool surf_linear128_c[DML2_MAX_PLANES]; + + unsigned int SurfaceSizeInTheMALL[DML2_MAX_PLANES]; + double VRatioPrefetchY[DML2_MAX_PLANES]; + double VRatioPrefetchC[DML2_MAX_PLANES]; + double Tno_bw[DML2_MAX_PLANES]; + double Tno_bw_flip[DML2_MAX_PLANES]; + double final_flip_bw[DML2_MAX_PLANES]; + double prefetch_vmrow_bw[DML2_MAX_PLANES]; + double cursor_bw[DML2_MAX_PLANES]; + double prefetch_cursor_bw[DML2_MAX_PLANES]; + double WritebackDelay[DML2_MAX_PLANES]; + unsigned int dpte_row_height[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + unsigned int dpte_group_bytes[DML2_MAX_PLANES]; + + double dpte_row_bw[DML2_MAX_PLANES]; + double time_per_tdlut_group[DML2_MAX_PLANES]; + double UrgentBurstFactorCursor[DML2_MAX_PLANES]; + double UrgentBurstFactorCursorPre[DML2_MAX_PLANES]; + double UrgentBurstFactorLuma[DML2_MAX_PLANES]; + double UrgentBurstFactorLumaPre[DML2_MAX_PLANES]; + double UrgentBurstFactorChroma[DML2_MAX_PLANES]; + double UrgentBurstFactorChromaPre[DML2_MAX_PLANES]; + + double meta_row_bw[DML2_MAX_PLANES]; + unsigned int meta_row_bytes[DML2_MAX_PLANES]; + unsigned int meta_req_width[DML2_MAX_PLANES]; + unsigned int meta_req_height[DML2_MAX_PLANES]; + unsigned int meta_row_width[DML2_MAX_PLANES]; + unsigned int meta_row_height[DML2_MAX_PLANES]; + unsigned int meta_req_width_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_width_chroma[DML2_MAX_PLANES]; + unsigned int meta_req_height_chroma[DML2_MAX_PLANES]; + + unsigned int swath_width_luma_ub[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES]; + unsigned int PTERequestSizeY[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES]; + unsigned int PTERequestSizeC[DML2_MAX_PLANES]; + + double TWait[DML2_MAX_PLANES]; + double Tdmdl_vm[DML2_MAX_PLANES]; + double Tdmdl[DML2_MAX_PLANES]; + double TSetup[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + + unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + + bool UnboundedRequestEnabled; + unsigned int CompressedBufferSizeInkByte; + unsigned int compbuf_reserved_space_64b; + bool hw_debug5; + unsigned int dcfclk_deep_sleep_hysteresis; + unsigned int min_return_latency_in_dcfclk; + + bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES]; + bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES]; + double ExtraLatency; + double ExtraLatency_sr; + double ExtraLatencyPrefetch; + bool PrefetchAndImmediateFlipSupported; + double TotalDataReadBandwidth; + double BandwidthAvailableForImmediateFlip; + bool NotEnoughTimeForDynamicMetadata[DML2_MAX_PLANES]; + + bool use_one_row_for_frame[DML2_MAX_PLANES]; + bool use_one_row_for_frame_flip[DML2_MAX_PLANES]; + + double TCalc; + unsigned int TotImmediateFlipBytes; + + // ------------------- + // Output + // ------------------- + unsigned int pipe_plane[DML2_MAX_PLANES]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe + unsigned int num_active_pipes; + + bool NoTimeToPrefetch[DML2_MAX_PLANES]; // <brief Prefetch schedule calculation result + + // Support + bool UrgVactiveBandwidthSupport; + bool PrefetchModeSupported; // <brief Is the prefetch mode (bandwidth and latency) supported + bool ImmediateFlipSupported; + bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + + // Clock + double Dcfclk; + double Dispclk; // <brief dispclk being used in mode programming + double Dppclk[DML2_MAX_PLANES]; // <brief dppclk being used in mode programming + double GlobalDPPCLK; + + double DSCCLK[DML2_MAX_PLANES]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now + double DCFCLKDeepSleep; + + // ARB reg + bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; + struct dml2_core_internal_watermarks Watermark; + + // DCC compression control + unsigned int request_size_bytes_luma[DML2_MAX_PLANES]; + unsigned int request_size_bytes_chroma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES]; + enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES]; + unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCYIndependentBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES]; + unsigned int DCCCIndependentBlock[DML2_MAX_PLANES]; + + // Stutter Efficiency + double StutterEfficiency; + double StutterEfficiencyNotIncludingVBlank; + unsigned int NumberOfStutterBurstsPerFrame; + double Z8StutterEfficiency; + unsigned int Z8NumberOfStutterBurstsPerFrame; + double Z8StutterEfficiencyNotIncludingVBlank; + double StutterPeriod; + double Z8StutterEfficiencyBestCase; + unsigned int Z8NumberOfStutterBurstsPerFrameBestCase; + double Z8StutterEfficiencyNotIncludingVBlankBestCase; + double StutterPeriodBestCase; + + // DLG TTU reg + double MIN_DST_Y_NEXT_START[DML2_MAX_PLANES]; + bool VREADY_AT_OR_AFTER_VSYNC[DML2_MAX_PLANES]; + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + double dst_y_prefetch[DML2_MAX_PLANES]; + double dst_y_per_vm_vblank[DML2_MAX_PLANES]; + double dst_y_per_row_vblank[DML2_MAX_PLANES]; + double dst_y_per_vm_flip[DML2_MAX_PLANES]; + double dst_y_per_row_flip[DML2_MAX_PLANES]; + double MinTTUVBlank[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; + double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + unsigned int CursorDstXOffset[DML2_MAX_PLANES]; + unsigned int CursorDstYOffset[DML2_MAX_PLANES]; + unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES]; + + double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES]; + double time_per_pte_group_nom_luma[DML2_MAX_PLANES]; + double time_per_pte_group_nom_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_luma[DML2_MAX_PLANES]; + double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_luma[DML2_MAX_PLANES]; + double time_per_pte_group_flip_chroma[DML2_MAX_PLANES]; + double TimePerVMGroupVBlank[DML2_MAX_PLANES]; + double TimePerVMGroupFlip[DML2_MAX_PLANES]; + double TimePerVMRequestVBlank[DML2_MAX_PLANES]; + double TimePerVMRequestFlip[DML2_MAX_PLANES]; + + double DST_Y_PER_META_ROW_NOM_L[DML2_MAX_PLANES]; + double DST_Y_PER_META_ROW_NOM_C[DML2_MAX_PLANES]; + double TimePerMetaChunkNominal[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkNominal[DML2_MAX_PLANES]; + double TimePerMetaChunkVBlank[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkVBlank[DML2_MAX_PLANES]; + double TimePerMetaChunkFlip[DML2_MAX_PLANES]; + double TimePerChromaMetaChunkFlip[DML2_MAX_PLANES]; + + double FractionOfUrgentBandwidth; + double FractionOfUrgentBandwidthImmediateFlip; + double FractionOfUrgentBandwidthMALL; + + // RQ registers + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + + unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES]; + bool is_using_mall_for_ss[DML2_MAX_PLANES]; + + // OTG + unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos. + unsigned int VStartup[DML2_MAX_PLANES]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank)) + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + + // Latency and Support + double MaxActiveFCLKChangeLatencySupported; + bool USRRetrainingSupport; + bool g6_temp_read_support; + enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + bool global_dram_clock_change_supported; + bool global_fclk_change_supported; + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; + double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; + + // buffer sizing + unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value. + unsigned int DETBufferSizeY[DML2_MAX_PLANES]; + unsigned int DETBufferSizeC[DML2_MAX_PLANES]; + unsigned int SwathHeightY[DML2_MAX_PLANES]; + unsigned int SwathHeightC[DML2_MAX_PLANES]; + + double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor + double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor + double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip + double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor + double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + + double avg_bandwidth_available_min[dml2_core_internal_soc_state_max]; + double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM + double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm traffic etc. + double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc. + + double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES]; + double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES]; + double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES]; + double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; + double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES]; + + unsigned int num_mcaches_l[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_l[DML2_MAX_PLANES]; + unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES]; + + unsigned int num_mcaches_c[DML2_MAX_PLANES]; + unsigned int mcache_row_bytes_c[DML2_MAX_PLANES]; + unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1]; + unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES]; + + bool mall_comb_mcache_l[DML2_MAX_PLANES]; + bool mall_comb_mcache_c[DML2_MAX_PLANES]; + bool lc_comb_mcache[DML2_MAX_PLANES]; +}; + +struct dml2_core_internal_SOCParametersList { + double UrgentLatency; + double ExtraLatency_sr; + double ExtraLatency; + double WritebackLatency; + double DRAMClockChangeLatency; + double FCLKChangeLatency; + double SRExitTime; + double SREnterPlusExitTime; + double SRExitZ8Time; + double SREnterPlusExitZ8Time; + double USRRetrainingLatency; + double SMNLatency; + double g6_temp_read_blackout_us; +}; + +struct dml2_core_calcs_mode_support_locals { + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + + unsigned int meta_row_height_luma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + + bool dummy_boolean[2]; + unsigned int dummy_integer[3]; + unsigned int dummy_integer_array[36][DML2_MAX_PLANES]; + enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES]; + bool dummy_boolean_array[2][DML2_MAX_PLANES]; + double dummy_single[3]; + double dummy_single_array[DML2_MAX_PLANES]; + struct dml2_core_internal_watermarks dummy_watermark; + double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + + unsigned int MaximumVStartup[DML2_MAX_PLANES]; + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + struct dml2_core_internal_SOCParametersList mSOCParameters; + struct dml2_core_internal_DmlPipe myPipe; + struct dml2_core_internal_DmlPipe SurfParameters[DML2_MAX_PLANES]; + unsigned int TotalNumberOfActiveWriteback; + unsigned int MaximumSwathWidthSupportLuma; + unsigned int MaximumSwathWidthSupportChroma; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool TotalAvailablePipesSupportNoDSC; + unsigned int NumberOfDPPNoDSC; + enum dml2_odm_mode ODMModeNoDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + bool TotalAvailablePipesSupportDSC; + unsigned int NumberOfDPPDSC; + enum dml2_odm_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalSlots; + unsigned int DSCFormatFactor; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + bool ImmediateFlipRequired; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool SubViewportMALLRefreshGreaterThan120Hz; + + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int NextMaxVStartup; + unsigned int MaxVStartup; + bool AnyLinesForVMOrRowTooLarge; + double PixelClockBackEndFactor; + unsigned int NumDSCUnitRequired; + + double Tvm_trips[DML2_MAX_PLANES]; + double Tr0_trips[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES]; + + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_row_bytes[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_mode_programming_locals { + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + + double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; + unsigned int dummy_integer_array[2][DML2_MAX_PLANES]; + enum dml2_output_encoder_class dummy_output_encoder_array[DML2_MAX_PLANES]; + double dummy_single_array[2][DML2_MAX_PLANES]; + unsigned int dummy_long_array[4][DML2_MAX_PLANES]; + bool dummy_boolean_array[2][DML2_MAX_PLANES]; + bool dummy_boolean[2]; + double dummy_single[2]; + struct dml2_core_internal_watermarks dummy_watermark; + + unsigned int DSCFormatFactor; + struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES]; + unsigned int ReorderBytes; + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int TotalDCCActiveDPP; + unsigned int TotalActiveDPP; + unsigned int Total3dlutActive; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG + bool immediate_flip_required; // any pipes need immediate flip + bool DestinationLineTimesForPrefetchLessThan2; + bool VRatioPrefetchMoreThanMax; + double MaxTotalRDBandwidthNotIncludingMALLPrefetch; + struct dml2_core_internal_SOCParametersList mmSOCParameters; + double Tvstartup_margin; + double dlg_vblank_start; + double LSetup; + double blank_lines_remaining; + double TotalWRBandwidth; + double WRBandwidth; + struct dml2_core_internal_DmlPipe myPipe; + double PixelClockBackEndFactor; + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int full_swath_bytes_c[DML2_MAX_PLANES]; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_row_bytes[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes[DML2_MAX_PLANES]; + + double Tvm_trips[DML2_MAX_PLANES]; + double Tr0_trips[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { + double ActiveDRAMClockChangeLatencyMargin[DML2_MAX_PLANES]; + double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES]; + double USRRetrainingLatencyMargin[DML2_MAX_PLANES]; + double g6_temp_read_latency_margin[DML2_MAX_PLANES]; + + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DML2_MAX_PLANES]; + double LinesInDETC[DML2_MAX_PLANES]; + unsigned int LinesInDETYRoundedDownToSwath[DML2_MAX_PLANES]; + unsigned int LinesInDETCRoundedDownToSwath[DML2_MAX_PLANES]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + + unsigned int TotalActiveWriteback; + unsigned int LBLatencyHidingSourceLinesY[DML2_MAX_PLANES]; + unsigned int LBLatencyHidingSourceLinesC[DML2_MAX_PLANES]; + double TotalPixelBW; + double EffectiveDETBufferSizeY; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + unsigned int dst_y_pstate; + unsigned int src_y_pstate_l; + unsigned int src_y_pstate_c; + unsigned int src_y_ahead_l; + unsigned int src_y_ahead_c; + unsigned int sub_vp_lines_l; + unsigned int sub_vp_lines_c; + +}; + +struct dml2_core_calcs_CalculateVMRowAndSwath_locals { + unsigned int PTEBufferSizeInRequestsForLuma[DML2_MAX_PLANES]; + unsigned int PTEBufferSizeInRequestsForChroma[DML2_MAX_PLANES]; + unsigned int vm_bytes_l; + unsigned int vm_bytes_c; + unsigned int PixelPTEBytesPerRowY[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowC[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowStorageY[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowStorageC[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_height_luma_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DML2_MAX_PLANES]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DML2_MAX_PLANES]; + bool one_row_per_frame_fits_in_buffer[DML2_MAX_PLANES]; + unsigned int HostVMDynamicLevels; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; +}; + +struct dml2_core_calcs_CalculateVMRowAndSwath_params { + const struct dml2_display_cfg *display_cfg; + unsigned int NumberOfActiveSurfaces; + struct dml2_core_internal_DmlPipe *myPipe; + unsigned int *SurfaceSizeInMALL; + unsigned int PTEBufferSizeInRequestsLuma; + unsigned int PTEBufferSizeInRequestsChroma; + unsigned int MALLAllocatedForDCN; + unsigned int *SwathWidthY; + unsigned int *SwathWidthC; + unsigned int HostVMMinPageSize; + unsigned int DCCMetaBufferSizeBytes; + bool mrq_present; + + // Output + bool *PTEBufferSizeNotExceeded; + bool *DCCMetaBufferSizeNotExceeded; + + unsigned int *dpte_row_width_luma_ub; + unsigned int *dpte_row_width_chroma_ub; + unsigned int *dpte_row_height_luma; + unsigned int *dpte_row_height_chroma; + unsigned int *dpte_row_height_linear_luma; // VBA_DELTA + unsigned int *dpte_row_height_linear_chroma; // VBA_DELTA + + unsigned int *vm_group_bytes; + unsigned int *dpte_group_bytes; + unsigned int *PixelPTEReqWidthY; + unsigned int *PixelPTEReqHeightY; + unsigned int *PTERequestSizeY; + unsigned int *vmpg_width_y; + unsigned int *vmpg_height_y; + + unsigned int *PixelPTEReqWidthC; + unsigned int *PixelPTEReqHeightC; + unsigned int *PTERequestSizeC; + unsigned int *vmpg_width_c; + unsigned int *vmpg_height_c; + + unsigned int *dpde0_bytes_per_frame_ub_l; + unsigned int *dpde0_bytes_per_frame_ub_c; + + unsigned int *PrefetchSourceLinesY; + unsigned int *PrefetchSourceLinesC; + unsigned int *VInitPreFillY; + unsigned int *VInitPreFillC; + unsigned int *MaxNumSwathY; + unsigned int *MaxNumSwathC; + double *dpte_row_bw; + unsigned int *PixelPTEBytesPerRow; + unsigned int *vm_bytes; + bool *use_one_row_for_frame; + bool *use_one_row_for_frame_flip; + bool *is_using_mall_for_ss; + bool *PTE_BUFFER_MODE; + unsigned int *BIGK_FRAGMENT_SIZE; + + // MRQ + unsigned int *meta_req_width_luma; + unsigned int *meta_req_height_luma; + unsigned int *meta_row_width_luma; + unsigned int *meta_row_height_luma; + unsigned int *meta_pte_bytes_per_frame_ub_l; + + unsigned int *meta_req_width_chroma; + unsigned int *meta_req_height_chroma; + unsigned int *meta_row_width_chroma; + unsigned int *meta_row_height_chroma; + unsigned int *meta_pte_bytes_per_frame_ub_c; + double *meta_row_bw; + unsigned int *meta_row_bytes; +}; + +struct dml2_core_calcs_CalculatePrefetchSchedule_locals { + bool NoTimeToPrefetch; + unsigned int DPPCycles; + unsigned int DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingVM; + double TimeForFetchingRowInVBlank; + double LinesToRequestPrefetchPixelData; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double max_Tsw; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double prefetch_bw_pr; + double bytes_pp; + double dep_bytes; + double min_Lsw_oto; + double Tsw_est1; + double Tsw_est3; + double prefetch_bw1; + double prefetch_bw2; + double prefetch_bw3; + double prefetch_bw4; + + double TWait_p; + unsigned int cursor_prefetch_bytes; +}; + +struct dml2_core_shared_calculate_det_buffer_size_params { + const struct dml2_display_cfg *display_cfg; + bool ForceSingleDPP; + unsigned int NumberOfActiveSurfaces; + bool UnboundedRequestEnabled; + unsigned int nomDETInKByte; + unsigned int MaxTotalDETInKByte; + unsigned int ConfigReturnBufferSizeInKByte; + unsigned int MinCompressedBufferSizeInKByte; + unsigned int ConfigReturnBufferSegmentSizeInkByte; + unsigned int CompressedBufferSegmentSizeInkByte; + double *ReadBandwidthLuma; + double *ReadBandwidthChroma; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *swath_time_value_us; + unsigned int *DPPPerSurface; + bool TryToAllocateForWriteLatency; + unsigned int bestEffortMinActiveLatencyHidingUs; + + // Output + unsigned int *DETBufferSizeInKByte; + unsigned int *CompressedBufferSizeInkByte; +}; + +struct dml2_core_shared_calculate_vm_and_row_bytes_params { + bool ViewportStationary; + bool DCCEnable; + unsigned int NumberOfDPPs; + unsigned int BlockHeight256Bytes; + unsigned int BlockWidth256Bytes; + enum dml2_source_format_class SourcePixelFormat; + unsigned int SurfaceTiling; + unsigned int BytePerPixel; + enum dml2_rotation_angle RotationAngle; + unsigned int SwathWidth; // per pipe + unsigned int ViewportHeight; + unsigned int ViewportXStart; + unsigned int ViewportYStart; + bool GPUVMEnable; + unsigned int GPUVMMaxPageTableLevels; + unsigned int GPUVMMinPageSizeKBytes; + unsigned int PTEBufferSizeInRequests; + unsigned int Pitch; + unsigned int MacroTileWidth; + unsigned int MacroTileHeight; + bool is_phantom; + unsigned int DCCMetaPitch; + bool mrq_present; + + // Output + unsigned int *PixelPTEBytesPerRow; // for bandwidth calculation + unsigned int *PixelPTEBytesPerRowStorage; // for PTE buffer size check + unsigned int *dpte_row_width_ub; + unsigned int *dpte_row_height; + unsigned int *dpte_row_height_linear; + unsigned int *PixelPTEBytesPerRow_one_row_per_frame; + unsigned int *dpte_row_width_ub_one_row_per_frame; + unsigned int *dpte_row_height_one_row_per_frame; + unsigned int *vmpg_width; + unsigned int *vmpg_height; + unsigned int *PixelPTEReqWidth; + unsigned int *PixelPTEReqHeight; + unsigned int *PTERequestSize; + unsigned int *dpde0_bytes_per_frame_ub; + + unsigned int *meta_row_bytes; + unsigned int *MetaRequestWidth; + unsigned int *MetaRequestHeight; + unsigned int *meta_row_width; + unsigned int *meta_row_height; + unsigned int *meta_pte_bytes_per_frame_ub; +}; + +struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals { + unsigned int MaximumSwathHeightY[DML2_MAX_PLANES]; + unsigned int MaximumSwathHeightC[DML2_MAX_PLANES]; + unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES]; + unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES]; + unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES]; + unsigned int SwathTimeValueUs[DML2_MAX_PLANES]; + + struct dml2_core_shared_calculate_det_buffer_size_params calculate_det_buffer_size_params; +}; + +struct dml2_core_shared_TruncToValidBPP_locals { +}; + +struct dml2_core_shared_CalculateDETBufferSize_locals { + unsigned int DETBufferSizePoolInKByte; + unsigned int NextDETBufferPieceInKByte; + unsigned int NextSurfaceToAssignDETPiece; + double TotalBandwidth; + double BandwidthOfSurfacesNotAssignedDETPiece; + unsigned int max_minDET; + unsigned int minDET; + unsigned int minDET_pipe; + unsigned int TotalBandwidthPerStream[DML2_MAX_PLANES]; + unsigned int TotalPixelRate; + unsigned int DETBudgetPerStream[DML2_MAX_PLANES]; + unsigned int RemainingDETBudgetPerStream[DML2_MAX_PLANES]; + unsigned int IdealDETBudget, DeltaDETBudget; + unsigned int ResidualDETAfterRounding; +}; + +struct dml2_core_shared_get_urgent_bandwidth_required_locals { + double required_bandwidth_mbps; + double required_bandwidth_mbps_this_surface; + double adj_factor_p0; + double adj_factor_p1; + double adj_factor_cur; + double adj_factor_p0_pre; + double adj_factor_p1_pre; + double adj_factor_cur_pre; + double per_plane_flip_bw[DML2_MAX_PLANES]; + double mall_svp_prefetch_factor; + double tmp_nom_adj_factor_p0; + double tmp_nom_adj_factor_p1; + double tmp_pref_adj_factor_p0; + double tmp_pref_adj_factor_p1; +}; + +struct dml2_core_shared_calculate_peak_bandwidth_required_locals { + double unity_array[DML2_MAX_PLANES]; + double zero_array[DML2_MAX_PLANES]; +}; + +struct dml2_core_shared_CalculateFlipSchedule_locals { + double min_row_time; + double Tvm_flip; + double Tr0_flip; + double ImmediateFlipBW; + double dpte_row_bytes; + double min_row_height; + double min_row_height_chroma; + double max_flip_time; + double lb_flip_bw; + double hvm_scaled_vm_bytes; + double num_rows; + double hvm_scaled_row_bytes; + double hvm_scaled_vm_row_bytes; + bool dual_plane; +}; + +struct dml2_core_shared_rq_dlg_get_dlg_reg_locals { + unsigned int plane_idx; + enum dml2_source_format_class source_format; + const struct dml2_timing_cfg *timing; + bool dual_plane; + enum dml2_odm_mode odm_mode; + + unsigned int htotal; + unsigned int hactive; + unsigned int hblank_end; + unsigned int vblank_end; + bool interlaced; + double pclk_freq_in_mhz; + double refclk_freq_in_mhz; + double ref_freq_to_pix_freq; + + unsigned int num_active_pipes; + unsigned int first_pipe_idx_in_plane; + unsigned int pipe_idx_in_combine; + unsigned int odm_combine_factor; + + double min_ttu_vblank; + unsigned int min_dst_y_next_start; + + unsigned int vready_after_vcount0; + + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + + double max_dst_y_per_vm_vblank; + double max_dst_y_per_row_vblank; + + double vratio_pre_l; + double vratio_pre_c; + + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_l; + + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_l; + + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_c; + + double dst_y_per_pte_row_nom_l; + double dst_y_per_pte_row_nom_c; + double refcyc_per_pte_group_nom_l; + double refcyc_per_pte_group_nom_c; + double refcyc_per_pte_group_vblank_l; + double refcyc_per_pte_group_vblank_c; + double refcyc_per_pte_group_flip_l; + double refcyc_per_pte_group_flip_c; + double refcyc_per_tdlut_group; + + double dst_y_per_meta_row_nom_l; + double dst_y_per_meta_row_nom_c; + double refcyc_per_meta_chunk_nom_l; + double refcyc_per_meta_chunk_nom_c; + double refcyc_per_meta_chunk_vblank_l; + double refcyc_per_meta_chunk_vblank_c; + double refcyc_per_meta_chunk_flip_l; + double refcyc_per_meta_chunk_flip_c; +}; + +struct dml2_core_shared_CalculateMetaAndPTETimes_params { + struct dml2_core_internal_scratch *scratch; + const struct dml2_display_cfg *display_cfg; + unsigned int NumberOfActiveSurfaces; + bool *use_one_row_for_frame; + double *dst_y_per_row_vblank; + double *dst_y_per_row_flip; + unsigned int *BytePerPixelY; + unsigned int *BytePerPixelC; + unsigned int *dpte_row_height; + unsigned int *dpte_row_height_chroma; + unsigned int *dpte_group_bytes; + unsigned int *PTERequestSizeY; + unsigned int *PTERequestSizeC; + unsigned int *PixelPTEReqWidthY; + unsigned int *PixelPTEReqHeightY; + unsigned int *PixelPTEReqWidthC; + unsigned int *PixelPTEReqHeightC; + unsigned int *dpte_row_width_luma_ub; + unsigned int *dpte_row_width_chroma_ub; + unsigned int *tdlut_groups_per_2row_ub; + bool mrq_present; + unsigned int MetaChunkSize; + unsigned int MinMetaChunkSizeBytes; + unsigned int *meta_row_width; + unsigned int *meta_row_width_chroma; + unsigned int *meta_row_height; + unsigned int *meta_row_height_chroma; + unsigned int *meta_req_width; + unsigned int *meta_req_width_chroma; + unsigned int *meta_req_height; + unsigned int *meta_req_height_chroma; + + // Output + double *time_per_tdlut_group; + double *DST_Y_PER_PTE_ROW_NOM_L; + double *DST_Y_PER_PTE_ROW_NOM_C; + double *time_per_pte_group_nom_luma; + double *time_per_pte_group_vblank_luma; + double *time_per_pte_group_flip_luma; + double *time_per_pte_group_nom_chroma; + double *time_per_pte_group_vblank_chroma; + double *time_per_pte_group_flip_chroma; + + double *DST_Y_PER_META_ROW_NOM_L; + double *DST_Y_PER_META_ROW_NOM_C; + + double *TimePerMetaChunkNominal; + double *TimePerChromaMetaChunkNominal; + double *TimePerMetaChunkVBlank; + double *TimePerChromaMetaChunkVBlank; + double *TimePerMetaChunkFlip; + double *TimePerChromaMetaChunkFlip; +}; + +struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params { + const struct dml2_display_cfg *display_cfg; + bool USRRetrainingRequired; + unsigned int NumberOfActiveSurfaces; + unsigned int MaxLineBufferLines; + unsigned int LineBufferSize; + unsigned int WritebackInterfaceBufferSize; + double DCFCLK; + double ReturnBW; + bool SynchronizeTimings; + bool SynchronizeDRRDisplaysForUCLKPStateChange; + unsigned int *dpte_group_bytes; + struct dml2_core_internal_SOCParametersList mmSOCParameters; + unsigned int WritebackChunkSize; + double SOCCLK; + double DCFClkDeepSleep; + unsigned int *DETBufferSizeY; + unsigned int *DETBufferSizeC; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *SwathWidthY; + unsigned int *SwathWidthC; + unsigned int *DPPPerSurface; + double *BytePerPixelDETY; + double *BytePerPixelDETC; + unsigned int *DSTXAfterScaler; + unsigned int *DSTYAfterScaler; + bool UnboundedRequestEnabled; + unsigned int CompressedBufferSizeInkByte; + bool max_oustanding_when_urgent_expected; + unsigned int max_outstanding_requests; + unsigned int max_request_size_bytes; + unsigned int *meta_row_height_l; + unsigned int *meta_row_height_c; + + // Output + struct dml2_core_internal_watermarks *Watermark; + enum dml2_dram_clock_change_support *DRAMClockChangeSupport; + bool *global_dram_clock_change_supported; + double *MaxActiveDRAMClockChangeLatencySupported; + unsigned int *SubViewportLinesNeededInMALL; + enum dml2_fclock_change_support *FCLKChangeSupport; + bool *global_fclk_change_supported; + double *MaxActiveFCLKChangeLatencySupported; + bool *USRRetrainingSupport; + double *VActiveLatencyHidingMargin; + double *VActiveLatencyHidingUs; + bool *g6_temp_read_support; +}; + + +struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params { + const struct dml2_display_cfg *display_cfg; + unsigned int ConfigReturnBufferSizeInKByte; + unsigned int MaxTotalDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + unsigned int rob_buffer_size_kbytes; + unsigned int pixel_chunk_size_kbytes; + bool ForceSingleDPP; + unsigned int NumberOfActiveSurfaces; + unsigned int nomDETInKByte; + unsigned int ConfigReturnBufferSegmentSizeInkByte; + unsigned int CompressedBufferSegmentSizeInkByte; + double *ReadBandwidthLuma; + double *ReadBandwidthChroma; + double *MaximumSwathWidthLuma; + double *MaximumSwathWidthChroma; + unsigned int *Read256BytesBlockHeightY; + unsigned int *Read256BytesBlockHeightC; + unsigned int *Read256BytesBlockWidthY; + unsigned int *Read256BytesBlockWidthC; + bool *surf_linear128_l; + bool *surf_linear128_c; + enum dml2_odm_mode *ODMMode; + unsigned int *BytePerPixY; + unsigned int *BytePerPixC; + double *BytePerPixDETY; + double *BytePerPixDETC; + unsigned int *DPPPerSurface; + bool mrq_present; + + // output + unsigned int *req_per_swath_ub_l; + unsigned int *req_per_swath_ub_c; + unsigned int *swath_width_luma_ub; + unsigned int *swath_width_chroma_ub; + unsigned int *SwathWidth; + unsigned int *SwathWidthChroma; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *request_size_bytes_luma; + unsigned int *request_size_bytes_chroma; + unsigned int *DETBufferSizeInKByte; + unsigned int *DETBufferSizeY; + unsigned int *DETBufferSizeC; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + bool *UnboundedRequestEnabled; + unsigned int *compbuf_reserved_space_64b; + unsigned int *CompressedBufferSizeInkByte; + bool *ViewportSizeSupportPerSurface; + bool *ViewportSizeSupport; + bool *hw_debug5; + + struct dml2_core_shared_calculation_funcs *funcs; +}; + +struct dml2_core_calcs_CalculateStutterEfficiency_locals { + double DETBufferingTimeY; + double SwathWidthYCriticalSurface; + double SwathHeightYCriticalSurface; + double VActiveTimeCriticalSurface; + double FrameTimeCriticalSurface; + unsigned int BytePerPixelYCriticalSurface; + unsigned int DETBufferSizeYCriticalSurface; + double MinTTUVBlankCriticalSurface; + unsigned int BlockWidth256BytesYCriticalSurface; + bool SinglePlaneCriticalSurface; + bool SinglePipeCriticalSurface; + double TotalCompressedReadBandwidth; + double TotalRowReadBandwidth; + double AverageDCCCompressionRate; + double EffectiveCompressedBufferSize; + double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; + double StutterBurstTime; + unsigned int TotalActiveWriteback; + double LinesInDETY; + double LinesInDETYRoundedDownToSwath; + double MaximumEffectiveCompressionLuma; + double MaximumEffectiveCompressionChroma; + double TotalZeroSizeRequestReadBandwidth; + double TotalZeroSizeCompressedReadBandwidth; + double AverageDCCZeroSizeFraction; + double AverageZeroSizeCompressionRate; +}; + +struct dml2_core_calcs_CalculateStutterEfficiency_params { + const struct dml2_display_cfg *display_cfg; + unsigned int CompressedBufferSizeInkByte; + bool UnboundedRequestEnabled; + unsigned int MetaFIFOSizeInKEntries; + unsigned int ZeroSizeBufferEntries; + unsigned int PixelChunkSizeInKByte; + unsigned int NumberOfActiveSurfaces; + unsigned int ROBBufferSizeInKByte; + double TotalDataReadBandwidth; + double DCFCLK; + double ReturnBW; + unsigned int CompbufReservedSpace64B; + unsigned int CompbufReservedSpaceZs; + bool hw_debug5; + double SRExitTime; + double SRExitZ8Time; + bool SynchronizeTimings; + double StutterEnterPlusExitWatermark; + double Z8StutterEnterPlusExitWatermark; + bool ProgressiveToInterlaceUnitInOPP; + double *MinTTUVBlank; + unsigned int *DPPPerSurface; + unsigned int *DETBufferSizeY; + unsigned int *BytePerPixelY; + double *BytePerPixelDETY; + unsigned int *SwathWidthY; + unsigned int *SwathHeightY; + unsigned int *SwathHeightC; + unsigned int *BlockHeight256BytesY; + unsigned int *BlockWidth256BytesY; + unsigned int *BlockHeight256BytesC; + unsigned int *BlockWidth256BytesC; + unsigned int *DCCYMaxUncompressedBlock; + unsigned int *DCCCMaxUncompressedBlock; + double *ReadBandwidthSurfaceLuma; + double *ReadBandwidthSurfaceChroma; + double *meta_row_bw; + double *dpte_row_bw; + bool rob_alloc_compressed; + + // output + double *StutterEfficiencyNotIncludingVBlank; + double *StutterEfficiency; + unsigned int *NumberOfStutterBurstsPerFrame; + double *Z8StutterEfficiencyNotIncludingVBlank; + double *Z8StutterEfficiency; + unsigned int *Z8NumberOfStutterBurstsPerFrame; + double *StutterPeriod; + bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; +}; + +struct dml2_core_calcs_CalculatePrefetchSchedule_params { + const struct dml2_display_cfg *display_cfg; + double HostVMInefficiencyFactor; + struct dml2_core_internal_DmlPipe *myPipe; + unsigned int DSCDelay; + double DPPCLKDelaySubtotalPlusCNVCFormater; + double DPPCLKDelaySCL; + double DPPCLKDelaySCLLBOnly; + double DPPCLKDelayCNVCCursor; + double DISPCLKDelaySubtotal; + unsigned int DPP_RECOUT_WIDTH; + enum dml2_output_format_class OutputFormat; + unsigned int MaxInterDCNTileRepeaters; + unsigned int VStartup; + unsigned int MaxVStartup; + unsigned int HostVMMinPageSize; + bool DynamicMetadataEnable; + bool DynamicMetadataVMEnabled; + unsigned int DynamicMetadataLinesBeforeActiveRequired; + unsigned int DynamicMetadataTransmittedBytes; + double UrgentLatency; + double ExtraLatencyPrefetch; + double TCalc; + unsigned int vm_bytes; + unsigned int PixelPTEBytesPerRow; + double PrefetchSourceLinesY; + unsigned int VInitPreFillY; + unsigned int MaxNumSwathY; + double PrefetchSourceLinesC; + unsigned int VInitPreFillC; + unsigned int MaxNumSwathC; + unsigned int swath_width_luma_ub; + unsigned int swath_width_chroma_ub; + unsigned int SwathHeightY; + unsigned int SwathHeightC; + double TWait; + double Ttrip; + double Turg; + bool setup_for_tdlut; + unsigned int tdlut_pte_bytes_per_frame; + unsigned int tdlut_bytes_per_frame; + double tdlut_opt_time; + double tdlut_drain_time; + + unsigned int num_cursors; + unsigned int cursor_bytes_per_chunk; + unsigned int cursor_bytes_per_line; + + // MRQ + bool dcc_enable; + bool mrq_present; + unsigned int meta_row_bytes; + double mall_prefetch_sdp_overhead_factor; + + // output + unsigned int *DSTXAfterScaler; + unsigned int *DSTYAfterScaler; + double *dst_y_prefetch; + double *dst_y_per_vm_vblank; + double *dst_y_per_row_vblank; + double *VRatioPrefetchY; + double *VRatioPrefetchC; + double *RequiredPrefetchPixelDataBWLuma; + double *RequiredPrefetchPixelDataBWChroma; + bool *NotEnoughTimeForDynamicMetadata; + double *Tno_bw; + double *Tno_bw_flip; + double *prefetch_vmrow_bw; + double *Tdmdl_vm; + double *Tdmdl; + double *TSetup; + double *Tvm_trips; + double *Tr0_trips; + double *Tvm_trips_flip; + double *Tr0_trips_flip; + double *Tvm_trips_flip_rounded; + double *Tr0_trips_flip_rounded; + unsigned int *VUpdateOffsetPix; + unsigned int *VUpdateWidthPix; + unsigned int *VReadyOffsetPix; + double *prefetch_cursor_bw; +}; + +struct dml2_core_calcs_calculate_mcache_row_bytes_params { + unsigned int num_chans; + unsigned int mem_word_bytes; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned int gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + + //enum dml_rotation_angle rotation_angle; + bool surf_vert; + unsigned int vp_stationary; + unsigned int tiling_mode; + bool imall_enable; + + unsigned int vp_start_x; + unsigned int vp_start_y; + unsigned int full_vp_width; + unsigned int full_vp_height; + unsigned int blk_width; + unsigned int blk_height; + unsigned int vmpg_width; + unsigned int vmpg_height; + unsigned int full_swath_bytes; + unsigned int bytes_per_pixel; + + // output + unsigned int *num_mcaches; + unsigned int *mcache_row_bytes; + unsigned int *meta_row_width_ub; + double *dcc_dram_bw_nom_overhead_factor; + double *dcc_dram_bw_pref_overhead_factor; + unsigned int *mvmpg_width; + unsigned int *mvmpg_height; + unsigned int *full_vp_access_width_mvmpg_aligned; + unsigned int *mvmpg_per_mcache_lb; +}; + +struct dml2_core_shared_calculate_mcache_setting_locals { + struct dml2_core_calcs_calculate_mcache_row_bytes_params l_p; + struct dml2_core_calcs_calculate_mcache_row_bytes_params c_p; + + bool is_dual_plane; + unsigned int mvmpg_width_l; + unsigned int mvmpg_height_l; + unsigned int full_vp_access_width_mvmpg_aligned_l; + unsigned int mvmpg_per_mcache_lb_l; + unsigned int meta_row_width_l; + + unsigned int mvmpg_width_c; + unsigned int mvmpg_height_c; + unsigned int full_vp_access_width_mvmpg_aligned_c; + unsigned int mvmpg_per_mcache_lb_c; + unsigned int meta_row_width_c; + + unsigned int lc_comb_last_mcache_size; + double luma_time_factor; + double mcache_remainder_l; + double mcache_remainder_c; + unsigned int mvmpg_access_width_l; + unsigned int mvmpg_access_width_c; + unsigned int avg_mcache_element_size_l; + unsigned int avg_mcache_element_size_c; + + unsigned int full_vp_access_width_l; + unsigned int full_vp_access_width_c; +}; + +struct dml2_core_calcs_calculate_mcache_setting_params { + bool dcc_enable; + unsigned int num_chans; + unsigned int mem_word_bytes; + unsigned int mcache_size_bytes; + unsigned int mcache_line_size_bytes; + unsigned int gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + + enum dml2_source_format_class source_format; + bool surf_vert; + unsigned int vp_stationary; + unsigned int tiling_mode; + bool imall_enable; + + unsigned int vp_start_x_l; + unsigned int vp_start_y_l; + unsigned int full_vp_width_l; + unsigned int full_vp_height_l; + unsigned int blk_width_l; + unsigned int blk_height_l; + unsigned int vmpg_width_l; + unsigned int vmpg_height_l; + unsigned int full_swath_bytes_l; + unsigned int bytes_per_pixel_l; + + unsigned int vp_start_x_c; + unsigned int vp_start_y_c; + unsigned int full_vp_width_c; + unsigned int full_vp_height_c; + unsigned int blk_width_c; + unsigned int blk_height_c; + unsigned int vmpg_width_c; + unsigned int vmpg_height_c; + unsigned int full_swath_bytes_c; + unsigned int bytes_per_pixel_c; + + // output + unsigned int *num_mcaches_l; + unsigned int *mcache_row_bytes_l; + unsigned int *mcache_offsets_l; + unsigned int *mcache_shift_granularity_l; + double *dcc_dram_bw_nom_overhead_factor_l; + double *dcc_dram_bw_pref_overhead_factor_l; + + unsigned int *num_mcaches_c; + unsigned int *mcache_row_bytes_c; + unsigned int *mcache_offsets_c; + unsigned int *mcache_shift_granularity_c; + double *dcc_dram_bw_nom_overhead_factor_c; + double *dcc_dram_bw_pref_overhead_factor_c; + + bool *mall_comb_mcache_l; + bool *mall_comb_mcache_c; + bool *lc_comb_mcache; +}; + +struct dml2_core_calcs_calculate_tdlut_setting_params { + // input params + double dispclk_mhz; + bool setup_for_tdlut; + enum dml2_tdlut_width_mode tdlut_width_mode; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode; + unsigned int cursor_buffer_size; + bool gpuvm_enable; + unsigned int gpuvm_page_size_kbytes; + bool is_gfx11; + bool tdlut_mpc_width_flag; + + // output param + unsigned int *tdlut_pte_bytes_per_frame; + unsigned int *tdlut_bytes_per_frame; + unsigned int *tdlut_groups_per_2row_ub; + double *tdlut_opt_time; + double *tdlut_drain_time; + unsigned int *tdlut_bytes_per_group; +}; + +// A list of overridable function pointers in the core +// shared calculation library. +struct dml2_core_shared_calculation_funcs { + void (*calculate_det_buffer_size)(struct dml2_core_shared_calculate_det_buffer_size_params *p); +}; + +struct dml2_core_internal_scratch { + // Scratch space for function locals + struct dml2_core_calcs_mode_support_locals dml_core_mode_support_locals; + struct dml2_core_calcs_mode_programming_locals dml_core_mode_programming_locals; + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; + struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals; + struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals; + struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals; + struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals; + struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals; + struct dml2_core_shared_get_urgent_bandwidth_required_locals get_urgent_bandwidth_required_locals; + struct dml2_core_shared_calculate_peak_bandwidth_required_locals calculate_peak_bandwidth_required_locals; + struct dml2_core_shared_CalculateFlipSchedule_locals CalculateFlipSchedule_locals; + struct dml2_core_shared_rq_dlg_get_dlg_reg_locals rq_dlg_get_dlg_reg_locals; + struct dml2_core_calcs_CalculateStutterEfficiency_locals CalculateStutterEfficiency_locals; + + // Scratch space for function params + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + struct dml2_core_calcs_CalculateVMRowAndSwath_params CalculateVMRowAndSwath_params; + struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params; + struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params; + struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params; + struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params; + struct dml2_core_shared_calculate_mcache_setting_locals calculate_mcache_setting_locals; + struct dml2_core_shared_CalculateMetaAndPTETimes_params CalculateMetaAndPTETimes_params; +}; + +//struct dml2_svp_mode_override; +struct dml2_core_internal_display_mode_lib { + struct dml2_core_ip_params ip; + struct dml2_soc_bb soc; + + //@brief Mode Support and Mode programming struct + // Used to hold input; intermediate and output of the calculations + struct dml2_core_internal_mode_support ms; // struct for mode support + struct dml2_core_internal_mode_program mp; // struct for mode programming + + // Available overridable calculators for core_shared. + // if null, core_shared will use default calculators. + struct dml2_core_shared_calculation_funcs funcs; + + struct dml2_core_internal_scratch scratch; +}; + +struct dml2_core_calcs_mode_support_ex { + struct dml2_core_internal_display_mode_lib *mode_lib; + const struct dml2_display_cfg *in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table; + int min_clk_index; + + //unsigned int in_state_index; + struct dml2_core_internal_mode_support_info *out_evaluation_info; +}; + +struct core_display_cfg_support_info; + +struct dml2_core_calcs_mode_programming_ex { + struct dml2_core_internal_display_mode_lib *mode_lib; + const struct dml2_display_cfg *in_display_cfg; + const struct dml2_mcg_min_clock_table *min_clk_table; + const struct core_display_cfg_support_info *cfg_support_info; + int min_clk_index; + + struct dml2_display_cfg_programming *programming; + +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c new file mode 100644 index 000000000000..e6698ee65843 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -0,0 +1,644 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_dpmm_dcn4.h" +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" +#include "lib_float_math.h" + +static double dram_bw_kbps_to_uclk_khz(unsigned long long bandwidth_kbps, const struct dml2_dram_params *dram_config) +{ + double uclk_khz = 0; + unsigned long uclk_mbytes_per_tick = 0; + + uclk_mbytes_per_tick = dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + uclk_khz = (double)bandwidth_kbps / uclk_mbytes_per_tick; + + return uclk_khz; +} + +static void get_minimum_clocks_for_latency(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, + double *uclk, + double *fclk, + double *dcfclk) +{ + int min_clock_index_for_latency; + + if (in_out->display_cfg->stage3.success) + min_clock_index_for_latency = in_out->display_cfg->stage3.min_clk_index_for_latency; + else + min_clock_index_for_latency = in_out->display_cfg->stage1.min_clk_index_for_latency; + + *dcfclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_dcfclk_khz; + *fclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_fclk_khz; + *uclk = dram_bw_kbps_to_uclk_khz(in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].pre_derate_dram_bw_kbps, + &in_out->soc_bb->clk_table.dram_config); +} + +static unsigned long dml_round_up(double a) +{ + if (a - (unsigned long)a > 0) { + return ((unsigned long)a) + 1; + } + return (unsigned long)a; +} + +static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg, min_uclk_urgent, min_uclk_bw; + double min_fclk_avg, min_fclk_urgent, min_fclk_bw; + double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw; + double min_fclk_latency, min_dcfclk_latency; + double min_uclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); +} + +static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + double min_uclk_avg; + double min_fclk_avg; + double min_dcfclk_avg; + double min_uclk_latency, min_fclk_latency, min_dcfclk_latency; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dram_derate_percent_pixel / 100); + + min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.fclk_derate_percent / 100); + + min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dcfclk_derate_percent / 100); + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); +} + +static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) +{ + enum dentist_divider_range { + DFS_DIVIDER_RANGE_1_START = 8, /* 2.00 */ + DFS_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ + DFS_DIVIDER_RANGE_2_START = 64, /* 16.00 */ + DFS_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ + DFS_DIVIDER_RANGE_3_START = 128, /* 32.00 */ + DFS_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ + DFS_DIVIDER_RANGE_4_START = 248, /* 62.00 */ + DFS_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ + DFS_DIVIDER_RANGE_SCALE_FACTOR = 4 + }; + + enum DFS_base_divider_id { + DFS_BASE_DID_1 = 0x08, + DFS_BASE_DID_2 = 0x40, + DFS_BASE_DID_3 = 0x60, + DFS_BASE_DID_4 = 0x7e, + DFS_MAX_DID = 0x7f + }; + + unsigned int divider; + + if (clock_khz < 1 || vco_freq_khz < 1 || clock_khz > vco_freq_khz) + return false; + + clock_khz *= 1.0 + margin; + + divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + + /* we want to floor here to get higher clock than required rather than lower */ + if (divider < DFS_DIVIDER_RANGE_2_START) { + if (divider < DFS_DIVIDER_RANGE_1_START) + *divider_id = DFS_BASE_DID_1; + else + *divider_id = DFS_BASE_DID_1 + ((divider - DFS_DIVIDER_RANGE_1_START) / DFS_DIVIDER_RANGE_1_STEP); + } else if (divider < DFS_DIVIDER_RANGE_3_START) { + *divider_id = DFS_BASE_DID_2 + ((divider - DFS_DIVIDER_RANGE_2_START) / DFS_DIVIDER_RANGE_2_STEP); + } else if (divider < DFS_DIVIDER_RANGE_4_START) { + *divider_id = DFS_BASE_DID_3 + ((divider - DFS_DIVIDER_RANGE_3_START) / DFS_DIVIDER_RANGE_3_STEP); + } else { + *divider_id = DFS_BASE_DID_4 + ((divider - DFS_DIVIDER_RANGE_4_START) / DFS_DIVIDER_RANGE_4_STEP); + if (*divider_id > DFS_MAX_DID) + *divider_id = DFS_MAX_DID; + } + + *rounded_khz = vco_freq_khz * DFS_DIVIDER_RANGE_SCALE_FACTOR / divider; + + return true; +} + +static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) +{ + bool result = false; + int index = 0; + + if (clock_table->num_clk_values > 2) { + while (index < clock_table->num_clk_values && clock_table->clk_values_khz[index] < min_value) + index++; + + if (index < clock_table->num_clk_values) { + *rounded_value = clock_table->clk_values_khz[index]; + result = true; + } + } else if (clock_table->clk_values_khz[clock_table->num_clk_values - 1] >= min_value) { + *rounded_value = min_value; + result = true; + } + return result; +} + +static bool round_up_to_next_dpm(unsigned long *clock_value, const struct dml2_clk_table *clock_table) +{ + return round_up_and_copy_to_next_dpm(*clock_value, clock_value, clock_table); +} + +static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mode_support_result, struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table) +{ + bool result; + unsigned int i; + + if (!state_table || !display_cfg) + return false; + + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.dcfclk_khz, &state_table->dcfclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.fclk_khz, &state_table->fclk); + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.uclk_khz, &state_table->uclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dispclk_khz, &state_table->dispclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.deepsleep_dcfclk_khz, &state_table->dcfclk); + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (result) + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4.dppclk_khz, &state_table->dppclk); + } + + for (i = 0; i < display_cfg->display_config.num_streams; i++) { + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dscclk_khz, &state_table->dscclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dtbclk_khz, &state_table->dtbclk); + if (result) + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.phyclk_khz, &state_table->phyclk); + } + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dpprefclk_khz, &state_table->dppclk); + + if (result) + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dtbrefclk_khz, &state_table->dtbclk); + + return result; +} + +static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + // Check that all displays timings are the same + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->stream_descriptors[remap_array[i - 1]].timing, &display_config->stream_descriptors[remap_array[i]].timing, sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + // Check if any displays are drr + for (i = 0; i < remap_array_size; i++) { + if (display_config->stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + // Trivial sync is possible if all displays are identical and none are DRR + return !contains_drr && identical; +} + +static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask) +{ + unsigned int i; + int min_idle_us = 0; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < in_out->programming->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + if (remap_array_size == 0) + return 0; + + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[0]].vblank_reserved_time_us; + + for (i = 1; i < remap_array_size; i++) { + if (min_idle_us > mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us) + min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us; + } + + return min_idle_us; +} + +static bool determine_power_management_features_with_vblank_only(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int min_idle_us; + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, 0xF)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xF); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + + return true; +} + +static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_without_vactive_margin_mask = 0x0; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (mode_support_result->cfg_support_info.plane_support_info[i].active_latency_hiding_us + < latency_hiding_requirement_us) + displays_without_vactive_margin_mask |= (0x1 << i); + } + + return displays_without_vactive_margin_mask; +} + +static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us) +{ + unsigned int i; + int displays_with_fams_mask = 0x0; + + for (i = 0; i < in_out->programming->display_config.num_planes; i++) { + if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config != dml2_svp_mode_override_auto) + displays_with_fams_mask |= (0x1 << i); + } + + return displays_with_fams_mask; +} + +static bool determine_power_management_features_with_vactive_and_vblank(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + if (in_out->programming->uclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->uclk_pstate_supported = true; + } + } + + if (in_out->programming->fclk_pstate_supported == false) { + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + return true; +} + +static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int displays_without_vactive_margin_mask = 0x0; + int displays_without_fams_mask = 0x0; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + displays_without_fams_mask = + get_displays_with_fams_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)); + + if ((displays_without_vactive_margin_mask & ~displays_without_fams_mask) == 0) + in_out->programming->uclk_pstate_supported = true; + + return true; +} + +static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; +} + +static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; +} + +static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + int i; + bool result; + double dispclk_khz; + const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + + calculate_system_active_minimums(in_out); + calculate_svp_prefetch_minimums(in_out); + calculate_idle_minimums(in_out); + + // In DCN4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore + // active minimums must be boosted to prefetch minimums + if (in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4.active.uclk_khz) + in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz; + + if (in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4.active.fclk_khz) + in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz; + + if (in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4.active.dcfclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz; + + // need some massaging for the dispclk ramping cases: + dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); + // ramping margin should not make dispclk exceed the maximum dispclk speed: + dispclk_khz = math_min2(dispclk_khz, in_out->min_clk_table->max_clocks_khz.dispclk); + // but still the required dispclk can be more than the maximum dispclk speed: + dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dispclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dispclk_did); + + // DPP Ref is always set to max of all DPP clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + } + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dpprefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dpprefclk_did); + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4.dpprefclk_khz, 1.0)); + } + + // DTB Ref is always set to max of all DTB clocks + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + if (in_out->programming->min_clocks.dcn4.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + } + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dtbrefclk_did); + + in_out->programming->min_clocks.dcn4.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4.socclk_khz = mode_support_result->global.socclk_khz; + + result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); + + // By default, all power management features are not enabled + in_out->programming->fclk_pstate_supported = false; + in_out->programming->uclk_pstate_supported = false; + + return result; +} + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + + result = map_mode_to_soc_dpm(in_out); + + // Check if any can be enabled by nominal vblank idle time + determine_power_management_features_with_vblank_only(in_out); + + // Check if any can be enabled in vactive/vblank + determine_power_management_features_with_vactive_and_vblank(in_out); + + // Check if any can be enabled via fams + determine_power_management_features_with_fams(in_out); + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + return result; +} + +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + bool result; + int displays_without_vactive_margin_mask = 0x0; + int min_idle_us = 0; + + result = map_mode_to_soc_dpm(in_out); + + if (in_out->display_cfg->stage3.success) + in_out->programming->uclk_pstate_supported = true; + + displays_without_vactive_margin_mask = + get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)); + + if (displays_without_vactive_margin_mask == 0) { + in_out->programming->fclk_pstate_supported = true; + } else { + if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) { + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask); + + if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us) + in_out->programming->fclk_pstate_supported = true; + } + } + + if (in_out->programming->uclk_pstate_supported == false) + clamp_uclk_to_max(in_out); + + if (in_out->programming->fclk_pstate_supported == false) + clamp_fclk_to_max(in_out); + + min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xFF); + if (in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + in_out->programming->stutter.supported_in_blank = true; + else + in_out->programming->stutter.supported_in_blank = false; + + // TODO: Fix me Sam + if (in_out->soc_bb->power_management_parameters.z8_min_idle_time > 0 && + in_out->programming->informative.power_management.z8.stutter_period >= in_out->soc_bb->power_management_parameters.z8_min_idle_time) + in_out->programming->z8_stutter.meets_eco = true; + else + in_out->programming->z8_stutter.meets_eco = false; + + if (in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + min_idle_us >= in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) + in_out->programming->z8_stutter.supported_in_blank = true; + else + in_out->programming->z8_stutter.supported_in_blank = false; + + return result; +} + +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + const struct dml2_display_cfg *display_cfg = &in_out->display_cfg->display_config; + const struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->core->clean_me_up.mode_lib; + struct dml2_dchub_global_register_set *dchubbub_regs = &in_out->programming->global_regs; + + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + + /* set A */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + /* set B */ + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000); + + dchubbub_regs->num_watermark_sets = 2; + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h new file mode 100644 index 000000000000..3afb69dfd040 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_DPMM_DCN4_H__ +#define __DML2_DPMM_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); +bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + +bool dpmm_dcn4_unit_test(void); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c new file mode 100644 index 000000000000..0f67cf67e4db --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_dpmm_factory.h" +#include "dml2_dpmm_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) +{ + return true; +} + +static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out) +{ + return true; +} + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out) +{ + bool result = false; + + if (out == 0) + return false; + + memset(out, 0, sizeof(struct dml2_dpmm_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->map_mode_to_soc_dpm = &dummy_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->map_mode_to_soc_dpm = &dpmm_dcn3_map_mode_to_soc_dpm; + out->map_watermarks = &dummy_map_watermarks; + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->map_mode_to_soc_dpm = &dpmm_dcn4_map_mode_to_soc_dpm; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h new file mode 100644 index 000000000000..80b44b4c2e68 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_DPMM_FACTORY_H__ +#define __DML2_DPMM_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c new file mode 100644 index 000000000000..f544f8c460c8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_mcg_dcn4.h" +#include "dml_top_soc_parameter_types.h" + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table); + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return build_min_clock_table(in_out->soc_bb, in_out->min_clk_table); +} + +static unsigned long long uclk_to_dram_bw_kbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config) +{ + unsigned long long bw_kbps = 0; + + bw_kbps = (unsigned long long) uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock; + + return bw_kbps; +} + +static unsigned long round_up_to_quantized_values(unsigned long value, const unsigned long *quantized_values, int num_quantized_values) +{ + int i; + + if (!quantized_values) + return 0; + + for (i = 0; i < num_quantized_values; i++) { + if (quantized_values[i] > value) + return quantized_values[i]; + } + + return 0; +} + +static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table) +{ + int i; + unsigned int j; + + bool dcfclk_fine_grained = false, fclk_fine_grained = false; + unsigned long min_dcfclk_khz = 0, max_dcfclk_khz = 0; + unsigned long min_fclk_khz = 0, max_fclk_khz = 0; + unsigned long prev_100, cur_50; + + if (!soc_bb || !min_table) + return false; + + if (soc_bb->clk_table.dcfclk.num_clk_values < 2 || soc_bb->clk_table.fclk.num_clk_values < 2) + return false; + + if (soc_bb->clk_table.uclk.num_clk_values > DML_MCG_MAX_CLK_TABLE_SIZE) + return false; + + min_table->fixed_clocks_khz.amclk = 0; + min_table->fixed_clocks_khz.dprefclk = soc_bb->dprefclk_mhz * 1000; + min_table->fixed_clocks_khz.pcierefclk = soc_bb->pcie_refclk_mhz * 1000; + min_table->fixed_clocks_khz.dchubrefclk = soc_bb->dchub_refclk_mhz * 1000; + min_table->fixed_clocks_khz.xtalclk = soc_bb->xtalclk_mhz * 1000; + + if (soc_bb->clk_table.dcfclk.num_clk_values == 2) { + dcfclk_fine_grained = true; + } + max_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; + min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[0]; + + if (soc_bb->clk_table.fclk.num_clk_values == 2) { + fclk_fine_grained = true; + } + max_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; + min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[0]; + + min_table->max_clocks_khz.dispclk = soc_bb->clk_table.dispclk.clk_values_khz[soc_bb->clk_table.dispclk.num_clk_values - 1]; + min_table->max_clocks_khz.dppclk = soc_bb->clk_table.dppclk.clk_values_khz[soc_bb->clk_table.dppclk.num_clk_values - 1]; + min_table->max_clocks_khz.dscclk = soc_bb->clk_table.dscclk.clk_values_khz[soc_bb->clk_table.dscclk.num_clk_values - 1]; + min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; + min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; + + min_table->max_clocks_khz.dcfclk = max_dcfclk_khz; + min_table->max_clocks_khz.fclk = max_fclk_khz; + + // First calculate the table for "balanced" bandwidths across UCLK/FCLK + for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) { + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config); + + min_table->dram_bw_table.entries[i].min_fclk_khz = (unsigned long)((((double)min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps * soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100) / ((double)soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100)) / soc_bb->fabric_datapath_to_dcn_data_return_bytes); + } + min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values; + + // To create the minium table, effectively shift "up" all the dcfclk/fclk entries by 1, and then replace the lowest entry with min fclk/dcfclk + for (i = min_table->dram_bw_table.num_entries - 1; i > 0; i--) { + prev_100 = min_table->dram_bw_table.entries[i - 1].min_fclk_khz; + cur_50 = min_table->dram_bw_table.entries[i].min_fclk_khz / 2; + min_table->dram_bw_table.entries[i].min_fclk_khz = prev_100 > cur_50 ? prev_100 : cur_50; + + if (!fclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_fclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_fclk_khz, soc_bb->clk_table.fclk.clk_values_khz, soc_bb->clk_table.fclk.num_clk_values); + } + } + min_table->dram_bw_table.entries[0].min_fclk_khz /= 2; + + // Clamp to minimums and maximums + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz < min_dcfclk_khz) + min_table->dram_bw_table.entries[i].min_dcfclk_khz = min_dcfclk_khz; + + if (min_table->dram_bw_table.entries[i].min_fclk_khz < min_fclk_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = min_fclk_khz; + + if (soc_bb->max_fclk_for_uclk_dpm_khz > 0 && + min_table->dram_bw_table.entries[i].min_fclk_khz > soc_bb->max_fclk_for_uclk_dpm_khz) + min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->max_fclk_for_uclk_dpm_khz; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_fclk_khz * + soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent; + + min_table->dram_bw_table.entries[i].min_dcfclk_khz = + min_table->dram_bw_table.entries[i].min_dcfclk_khz * soc_bb->fabric_datapath_to_dcn_data_return_bytes / soc_bb->return_bus_width_bytes; + + if (!dcfclk_fine_grained) { + min_table->dram_bw_table.entries[i].min_dcfclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_dcfclk_khz, soc_bb->clk_table.dcfclk.clk_values_khz, soc_bb->clk_table.dcfclk.num_clk_values); + } + } + + // Prune states which are invalid (some clocks exceed maximum) + for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz > min_table->max_clocks_khz.dcfclk || + min_table->dram_bw_table.entries[i].min_fclk_khz > min_table->max_clocks_khz.fclk) { + min_table->dram_bw_table.num_entries = i; + break; + } + } + + // Prune duplicate states + for (i = 0; i < (int)min_table->dram_bw_table.num_entries - 1; i++) { + if (min_table->dram_bw_table.entries[i].min_dcfclk_khz == min_table->dram_bw_table.entries[i + 1].min_dcfclk_khz && + min_table->dram_bw_table.entries[i].min_fclk_khz == min_table->dram_bw_table.entries[i + 1].min_fclk_khz && + min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps == min_table->dram_bw_table.entries[i + 1].pre_derate_dram_bw_kbps) { + + // i + 1 is the same state as i, so shift everything + for (j = i + 1; j < min_table->dram_bw_table.num_entries; j++) { + min_table->dram_bw_table.entries[j].min_dcfclk_khz = min_table->dram_bw_table.entries[j + 1].min_dcfclk_khz; + min_table->dram_bw_table.entries[j].min_fclk_khz = min_table->dram_bw_table.entries[j + 1].min_fclk_khz; + min_table->dram_bw_table.entries[j].pre_derate_dram_bw_kbps = min_table->dram_bw_table.entries[j + 1].pre_derate_dram_bw_kbps; + } + min_table->dram_bw_table.num_entries--; + } + } + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h new file mode 100644 index 000000000000..2419d2dd6b3b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_MCG_DCN4_H__ +#define __DML2_MCG_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); +bool mcg_dcn4_unit_test(void); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c new file mode 100644 index 000000000000..ce83c10253a2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_mcg_factory.h" +#include "dml2_mcg_dcn4.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out) +{ + return true; +} + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out) +{ + bool result = false; + + if (out == 0) + return false; + + memset(out, 0, sizeof(struct dml2_mcg_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->build_min_clock_table = &dummy_build_min_clock_table; + result = true; + break; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->build_min_clock_table = &mcg_dcn4_build_min_clock_table; + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h new file mode 100644 index 000000000000..5dfdfed04e22 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_MCG_FACTORY_H__ +#define __DML2_MCG_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c new file mode 100644 index 000000000000..7cedb191140e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn3.h" + +static void sort(double *list_a, int list_a_size) +{ + double temp; + // For all elements b[i] in list_b[] + for (int i = 0; i < list_a_size - 1; i++) { + // Find the first element of list_a that's larger than b[i] + for (int j = i; j < list_a_size - 1; j++) { + if (list_a[j] > list_a[j + 1]) { + temp = list_a[j]; + list_a[j] = list_a[j + 1]; + list_a[j + 1] = temp; + } + } + } +} + +static void set_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index, double reserved_time_us) +{ + struct dml2_plane_parameters *plane_descriptor; + + for (unsigned int i = 0; i < config->display_config.num_planes; i++) { + plane_descriptor = &config->display_config.plane_descriptors[i]; + + if (plane_descriptor->stream_index == stream_index) + plane_descriptor->overrides.reserved_vblank_time_ns = (long int)(reserved_time_us * 1000); + } +} + +static void remove_duplicates(double *list_a, int *list_a_size) +{ + int cur_element = 0; + // For all elements b[i] in list_b[] + while (cur_element < *list_a_size - 1) { + if (list_a[cur_element] == list_a[cur_element + 1]) { + for (int j = cur_element + 1; j < *list_a_size - 1; j++) { + list_a[j] = list_a[j + 1]; + } + *list_a_size = *list_a_size - 1; + } else { + cur_element++; + } + } +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +static bool iterate_to_next_candidiate(struct dml2_pmo_instance *pmo, int size) +{ + int borrow_from, i; + bool success = false; + + if (pmo->scratch.pmo_dcn3.current_candidate[0] > 0) { + pmo->scratch.pmo_dcn3.current_candidate[0]--; + success = true; + } else { + for (borrow_from = 1; borrow_from < size && pmo->scratch.pmo_dcn3.current_candidate[borrow_from] == 0; borrow_from++) + ; + + if (borrow_from < size) { + pmo->scratch.pmo_dcn3.current_candidate[borrow_from]--; + for (i = 0; i < borrow_from; i++) { + pmo->scratch.pmo_dcn3.current_candidate[i] = pmo->scratch.pmo_dcn3.reserved_time_candidates_count[i] - 1; + } + + success = true; + } + } + + return success; +} + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, + &display_config->display_config.stream_descriptors[remap_array[i]].timing, + sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + for (i = 0; i < remap_array_size; i++) { + if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + return !contains_drr && identical; +} + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->min_clock_table_size = in_out->min_clock_table_size; + + pmo->options = in_out->options; + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + } + + return true; +} + +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + free_pipes -= planes_on_stream; + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int stream_index, plane_index, candidate_count; + double min_reserved_vblank_time = 0; + int fclk_twait_needed_mask = 0x0; + int uclk_twait_needed_mask = 0x0; + + state->performed = true; + state->min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn3.max_latency_index = pmo->min_clock_table_size; + pmo->scratch.pmo_dcn3.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + pmo->scratch.pmo_dcn3.stream_mask = 0xF; + + for (plane_index = 0; plane_index < in_out->base_display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &in_out->base_display_config->display_config.plane_descriptors[plane_index]; + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[plane_descriptor->stream_index]; + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_if_needed) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_try) + uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us < + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us && + stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_if_needed) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_try) + fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index); + + if (plane_descriptor->overrides.legacy_svp_config != dml2_svp_mode_override_auto) { + pmo->scratch.pmo_dcn3.stream_mask &= ~(0x1 << plane_descriptor->stream_index); + } + } + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[stream_index]; + + // The absolute minimum required time is the minimum of all the required budgets + /* + if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate + == dml2_twait_budgeting_setting_require) + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us); + } + + if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate + == dml2_twait_budgeting_setting_require) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us); + } + } + + if (stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit + == dml2_twait_budgeting_setting_require) + min_reserved_vblank_time = max_double2(min_reserved_vblank_time, + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us); + */ + + // Insert the absolute minimum into the array + candidate_count = 1; + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][0] = min_reserved_vblank_time; + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + if (!(pmo->scratch.pmo_dcn3.stream_mask & (0x1 << stream_index))) + continue; + + // For every optional feature, we create a candidate for it only if it's larger minimum. + if ((fclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us; + } + } + + if ((uclk_twait_needed_mask & (0x1 << stream_index)) && + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us > min_reserved_vblank_time) { + + if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) { + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + } + } + + if ((stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_try || + stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_if_needed) && + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > min_reserved_vblank_time) { + + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] = + in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + } + + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count; + + // Finally sort the array of candidates + sort(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + remove_duplicates(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index], + &pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]); + + pmo->scratch.pmo_dcn3.current_candidate[stream_index] = + pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] - 1; + } + + return true; +} + +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, stream_index; + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + stream_index = in_out->base_display_config->display_config.plane_descriptors[i].stream_index; + + if (in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]] * 1000) { + return false; + } + } + + return true; +} + +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int stream_index; + bool success = false; + bool reached_end = true; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn3.cur_latency_index < pmo->scratch.pmo_dcn3.max_latency_index) { + // If we haven't tried all the clock bounds to support this state, try a higher one + pmo->scratch.pmo_dcn3.cur_latency_index++; + + success = true; + } else { + // If there's nothing higher to try, then we have to have a smaller canadidate + reached_end = !iterate_to_next_candidiate(pmo, in_out->optimized_display_config->display_config.num_streams); + + if (!reached_end) { + pmo->scratch.pmo_dcn3.cur_latency_index = pmo->scratch.pmo_dcn3.min_latency_index; + success = true; + } + } + } else { + success = true; + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = pmo->scratch.pmo_dcn3.cur_latency_index; + + for (stream_index = 0; stream_index < in_out->optimized_display_config->display_config.num_streams; stream_index++) { + set_reserved_time_on_all_planes_with_stream_index(in_out->optimized_display_config, stream_index, + pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]]); + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h new file mode 100644 index 000000000000..cc350f88d4d2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_PMO_DCN3_H__ +#define __DML2_PMO_DCN3_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c new file mode 100644 index 000000000000..34d991d44e73 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c @@ -0,0 +1,1250 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn4.h" + +static const int MIN_VACTIVE_MARGIN_US = 100; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding +static const int SUBVP_DRR_MARGIN_US = 100; + +static const enum dml2_pmo_pstate_strategy full_strategy_list_1_display[][4] = { + // VActive Preferred + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VBlank + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Finally DRR + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, +}; + +static const int full_strategy_list_1_display_size = sizeof(full_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); + +static const enum dml2_pmo_pstate_strategy full_strategy_list_2_display[][4] = { + // VActive only is preferred + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VActive + VBlank + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VBlank only + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + VBlank + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + SVP + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Finally DRR + DRR + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, +}; + +static const int full_strategy_list_2_display_size = sizeof(full_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); + +static const enum dml2_pmo_pstate_strategy full_strategy_list_3_display[][4] = { + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 3 VBlank +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank + + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR +}; + +static const int full_strategy_list_3_display_size = sizeof(full_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); + +static const enum dml2_pmo_pstate_strategy full_strategy_list_4_display[][4] = { + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 1 VBlank + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 2 VBlank +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + +// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, +// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank + + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR +}; + +static const int full_strategy_list_4_display_size = sizeof(full_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i; + int count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + free_pipes -= planes_on_stream; + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->min_clock_table_size = in_out->min_clock_table_size; + + pmo->fams_params.v1.subvp.fw_processing_delay_us = 10; + pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us = 50; + pmo->fams_params.v1.subvp.refresh_rate_limit_max = 175; + pmo->fams_params.v1.subvp.refresh_rate_limit_min = 0; + + pmo->options = in_out->options; + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + } + + return true; +} + +bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +static bool are_timings_trivially_synchronizable(const struct display_configuation_with_meta *display_config, int mask) +{ + unsigned int i; + bool identical = true; + bool contains_drr = false; + unsigned int remap_array[DML2_MAX_PLANES]; + unsigned int remap_array_size = 0; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (mask & (0x1 << i)) { + remap_array[remap_array_size++] = i; + } + } + + // 0 or 1 display is always trivially synchronizable + if (remap_array_size <= 1) + return true; + + for (i = 1; i < remap_array_size; i++) { + if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, + &display_config->display_config.stream_descriptors[remap_array[i]].timing, + sizeof(struct dml2_timing_cfg))) { + identical = false; + break; + } + } + + for (i = 0; i < remap_array_size; i++) { + if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { + contains_drr = true; + break; + } + } + + return !contains_drr && identical; +} + +static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) +{ + *bit_field = *bit_field | (0x1 << bit_offset); +} + +static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) +{ + if (bit_field & (0x1 << bit_offset)) + return true; + + return false; +} + +static bool are_all_timings_drr_enabled(const struct display_configuation_with_meta *display_config, int mask) +{ + unsigned char i; + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + if (!display_config->display_config.stream_descriptors[i].timing.drr_config.enabled) + return false; + } + } + + return true; +} + +static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +{ + int stream_index; + + scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; + + for (stream_index = 0; stream_index < stream_count; stream_index++) { + scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; + + if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) + scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; + } + + scratch->pmo_dcn4.num_pstate_candidates++; +} + +static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +{ + unsigned char i; + enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; + + if (strategy == dml2_pmo_pstate_strategy_vactive) + matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + else if (strategy == dml2_pmo_pstate_strategy_vblank) + matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) + return false; + } + } + + return true; +} + +static bool subvp_subvp_schedulable(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, + unsigned int *svp_stream_indicies, int svp_stream_count) +{ + struct dml2_pmo_scratch *s = &pmo->scratch; + int i; + int microschedule_lines, time_us, refresh_hz; + int max_microschedule_us = 0; + int vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + const struct dml2_timing_cfg *svp_timing1 = 0; + const struct dml2_implicit_svp_meta *svp_meta1 = 0; + + const struct dml2_timing_cfg *svp_timing2 = 0; + + if (svp_stream_count <= 1) + return true; + else if (svp_stream_count > 2) + return false; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + for (i = 0; i < svp_stream_count; i++) { + svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[i]].timing; + svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[i]]; + + microschedule_lines = svp_meta1->v_active; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (int)((microschedule_lines * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000 + + pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us + pmo->fams_params.v1.subvp.fw_processing_delay_us + 1); + + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + refresh_hz = (int)((double)(svp_timing1->pixel_clock_khz * 1000) / (svp_timing1->v_total * svp_timing1->h_total)); + + if (refresh_hz < pmo->fams_params.v1.subvp.refresh_rate_limit_min || + refresh_hz > pmo->fams_params.v1.subvp.refresh_rate_limit_max) { + return false; + } + } + + svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[0]].timing; + svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[0]]; + + vactive1_us = (int)((svp_timing1->v_active * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); + + vblank1_us = (int)(((svp_timing1->v_total - svp_timing1->v_active) * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); + + svp_timing2 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[1]].timing; + + vactive2_us = (int)((svp_timing2->v_active * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); + + vblank2_us = (int)(((svp_timing2->v_total - svp_timing2->v_active) * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +static bool validate_svp_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, int svp_stream_mask) +{ + bool result = false; + unsigned int stream_index; + + unsigned int svp_stream_indicies[2] = { 0 }; + unsigned int svp_stream_count = 0; + + // Find the SVP streams, store only the first 2, but count all of them + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { + if (svp_stream_count < 2) + svp_stream_indicies[svp_stream_count] = stream_index; + + svp_stream_count++; + } + } + + if (svp_stream_count == 1) { + result = true; // 1 SVP is always co_functional + } else if (svp_stream_count == 2) { + result = subvp_subvp_schedulable(pmo, display_cfg, svp_stream_indicies, svp_stream_count); + } + + return result; +} + +static bool validate_drr_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, int drr_stream_mask) +{ + unsigned int stream_index; + int drr_stream_count = 0; + + // Find the SVP streams and count all of them + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { + drr_stream_count++; + } + } + + return drr_stream_count <= 4; +} + +static bool validate_svp_drr_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int drr_stream_mask) +{ + unsigned int stream_index; + int drr_stream_count = 0; + int svp_stream_count = 0; + + int prefetch_us = 0; + int mall_region_us = 0; + int drr_frame_us = 0; // nominal frame time + int subvp_active_us = 0; + int stretched_drr_us = 0; + int drr_stretched_vblank_us = 0; + int max_vblank_mallregion = 0; + + const struct dml2_timing_cfg *svp_timing = 0; + const struct dml2_timing_cfg *drr_timing = 0; + const struct dml2_implicit_svp_meta *svp_meta = 0; + + bool schedulable = false; + + // Find the SVP streams and count all of them + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { + svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; + svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; + svp_stream_count++; + } + if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { + drr_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; + drr_stream_count++; + } + } + + if (svp_stream_count == 1 && drr_stream_count == 1 && svp_timing != drr_timing) { + prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) + * svp_timing->h_total / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + + pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); + + subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / + (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); + + drr_frame_us = (int)(drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pixel_clock_khz * 1000) * 1000000); + + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / + (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); + + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + + drr_stretched_vblank_us = (int)((drr_timing->v_total - drr_timing->v_active) * drr_timing->h_total / + (double)(drr_timing->pixel_clock_khz * 1000) * 1000000 + (stretched_drr_us - drr_frame_us)); + + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->drr_config.min_refresh_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + } + + return schedulable; +} + +static bool validate_svp_vblank_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int vblank_stream_mask) +{ + unsigned int stream_index; + int vblank_stream_count = 0; + int svp_stream_count = 0; + + const struct dml2_timing_cfg *svp_timing = 0; + const struct dml2_timing_cfg *vblank_timing = 0; + const struct dml2_implicit_svp_meta *svp_meta = 0; + + int prefetch_us = 0; + int mall_region_us = 0; + int vblank_frame_us = 0; + int subvp_active_us = 0; + int vblank_blank_us = 0; + int max_vblank_mallregion = 0; + + bool schedulable = false; + + // Find the SVP streams and count all of them + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { + svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; + svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; + svp_stream_count++; + } + if (is_bit_set_in_bitfield(vblank_stream_mask, stream_index)) { + vblank_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; + vblank_stream_count++; + } + } + + if (svp_stream_count == 1 && vblank_stream_count > 0) { + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) * svp_timing->h_total + / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + + pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); + + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / + (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); + + vblank_frame_us = (int)(vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); + + vblank_blank_us = (int)((vblank_timing->v_total - vblank_timing->v_active) * vblank_timing->h_total / + (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); + + subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / + (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); + + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +static bool validate_drr_vblank_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, int drr_stream_mask, int vblank_stream_mask) +{ + return false; +} + +static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[4]) +{ + struct dml2_pmo_scratch *s = &pmo->scratch; + + unsigned int stream_index = 0; + + unsigned int svp_count = 0; + unsigned int svp_stream_mask = 0; + unsigned int drr_count = 0; + unsigned int drr_stream_mask = 0; + unsigned int vactive_count = 0; + unsigned int vactive_stream_mask = 0; + unsigned int vblank_count = 0; + unsigned int vblank_stream_mask = 0; + + bool strategy_matches_forced_requirements = true; + + bool admissible = false; + + // Tabulate everything + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + + if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + per_stream_pstate_strategy[stream_index])) { + strategy_matches_forced_requirements = false; + break; + } + + if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + svp_count++; + set_bit_in_bitfield(&svp_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + drr_count++; + set_bit_in_bitfield(&drr_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive) { + vactive_count++; + set_bit_in_bitfield(&vactive_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) { + vblank_count++; + set_bit_in_bitfield(&vblank_stream_mask, stream_index); + } + } + + if (!strategy_matches_forced_requirements) + return false; + + // Check for trivial synchronization for vblank + if (vblank_count > 0 && (pmo->options->disable_vblank || !are_timings_trivially_synchronizable(display_cfg, vblank_stream_mask))) + return false; + + if (svp_count > 0 && pmo->options->disable_svp) + return false; + + if (drr_count > 0 && (pmo->options->disable_drr_var || !are_all_timings_drr_enabled(display_cfg, drr_stream_mask))) + return false; + + // Validate for FAMS admissibiliy + if (svp_count == 0 && drr_count == 0) { + // No FAMS + admissible = true; + } else { + admissible = false; + if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count == 0) { + // All SVP + admissible = validate_svp_cofunctionality(pmo, display_cfg, svp_stream_mask); + } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { + // All DRR + admissible = validate_drr_cofunctionality(pmo, display_cfg, drr_stream_mask); + } else if (svp_count > 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { + // SVP + DRR + admissible = validate_svp_drr_cofunctionality(pmo, display_cfg, svp_stream_mask, drr_stream_mask); + } else if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count > 0) { + // SVP + VBlank + admissible = validate_svp_vblank_cofunctionality(pmo, display_cfg, svp_stream_mask, vblank_stream_mask); + } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count > 0) { + // DRR + VBlank + admissible = validate_drr_vblank_cofunctionality(pmo, display_cfg, drr_stream_mask, vblank_stream_mask); + } + } + + return admissible; +} + +static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned char i; + int min_vactive_margin_us = 0xFFFFFFF; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) + min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; + } + } + + return min_vactive_margin_us; +} + +bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + struct dml2_pmo_scratch *s = &pmo->scratch; + + struct display_configuation_with_meta *display_config; + const struct dml2_plane_parameters *plane_descriptor; + const enum dml2_pmo_pstate_strategy (*strategy_list)[4] = 0; + unsigned int strategy_list_size = 0; + unsigned int plane_index, stream_index, i; + + state->performed = true; + + display_config = in_out->base_display_config; + display_config->display_config.overrides.enable_subvp_implicit_pmo = true; + + memset(s, 0, sizeof(struct dml2_pmo_scratch)); + + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn4.max_latency_index = pmo->min_clock_table_size; + pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; + + set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); + + state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + } + + // Figure out which streams can do vactive, and also build up implicit SVP meta + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= + MIN_VACTIVE_MARGIN_US) + set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); + + s->pmo_dcn4.stream_svp_meta[stream_index].valid = true; + s->pmo_dcn4.stream_svp_meta[stream_index].v_active = + display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; + s->pmo_dcn4.stream_svp_meta[stream_index].v_total = + display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; + s->pmo_dcn4.stream_svp_meta[stream_index].v_front_porch = 1; + } + + switch (display_config->display_config.num_streams) { + case 1: + strategy_list = full_strategy_list_1_display; + strategy_list_size = full_strategy_list_1_display_size; + break; + case 2: + strategy_list = full_strategy_list_2_display; + strategy_list_size = full_strategy_list_2_display_size; + break; + case 3: + strategy_list = full_strategy_list_3_display; + strategy_list_size = full_strategy_list_3_display_size; + break; + case 4: + strategy_list = full_strategy_list_4_display; + strategy_list_size = full_strategy_list_4_display_size; + break; + default: + strategy_list_size = 0; + break; + } + + if (strategy_list_size == 0) + return false; + + s->pmo_dcn4.num_pstate_candidates = 0; + + for (i = 0; i < strategy_list_size && i < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { + insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + } + } + + if (s->pmo_dcn4.num_pstate_candidates > 0) { + // There's this funny case... + // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 + // Otherwise the current index should be -1 because we run the optimization at least once + s->pmo_dcn4.cur_pstate_candidate = 0; + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { + s->pmo_dcn4.cur_pstate_candidate = -1; + break; + } + } + return true; + } else { + return false; + } +} + +static void reset_display_configuration(struct display_configuation_with_meta *display_config) +{ + unsigned int plane_index; + unsigned int stream_index; + struct dml2_plane_parameters *plane; + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + display_config->stage3.stream_svp_meta[stream_index].valid = false; + } + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Unset SubVP + plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; + + // Remove reserve time + plane->overrides.reserved_vblank_time_ns = 0; + + // Reset strategy to auto + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; + } +} + +static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Setup DRR + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; + } + } +} + +static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) +{ + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; + } + } + + if (stream_index >= 0) { + display_config->stage3.stream_svp_meta[stream_index].valid = true; + display_config->stage3.stream_svp_meta[stream_index].v_active = + display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; + display_config->stage3.stream_svp_meta[stream_index].v_total = + display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; + display_config->stage3.stream_svp_meta[stream_index].v_front_porch = 1; + } +} + +static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Setup reserve time + plane->overrides.reserved_vblank_time_ns = 400 * 1000; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; + } + } +} + +static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) +{ + unsigned int plane_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + } + } +} + +static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_scratch *scratch, int strategy_index) +{ + bool success = true; + unsigned int stream_index; + + reset_display_configuration(display_config); + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + success = false; + break; + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + setup_planes_for_vblank_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + setup_planes_for_svp_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + setup_planes_for_drr_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + setup_planes_for_vactive_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } + } + + return success; +} + +static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) +{ + int min_time_us = 0xFFFFFF; + unsigned int plane_index = 0; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) + min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; + } + } + return min_time_us; +} + +bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + bool p_state_supported = true; + unsigned int stream_index; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + if (s->pmo_dcn4.cur_pstate_candidate < 0) + return false; + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + + if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive) { + if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_US) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank) { + if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < + in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr)) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + p_state_supported = false; + break; + } + } + + return p_state_supported; +} + +bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + s->pmo_dcn4.cur_latency_index++; + + success = true; + } + } + + if (!success) { + s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; + s->pmo_dcn4.cur_pstate_candidate++; + + if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { + success = true; + } + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; + setup_display_config(in_out->optimized_display_config, &in_out->instance->scratch, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h new file mode 100644 index 000000000000..09cacc933d21 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_PMO_DCN4_H__ +#define __DML2_PMO_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +bool pmo_dcn4_unit_test(void); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c new file mode 100644 index 000000000000..214411ab46df --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -0,0 +1,2060 @@ +/* +* Copyright 2022 Advanced Micro Devices, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +* +* Authors: AMD +* +*/ + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn4.h" +#include "dml2_debug.h" +#include "lib_float_math.h" +#include "dml2_pmo_dcn4_fams2.h" + +#define PMO_DCN4_MIN_TIME_TO_DISALLOW_MS 0.0 + +static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding + +static const enum dml2_pmo_pstate_strategy base_strategy_list_1_display[][PMO_DCN4_MAX_DISPLAYS] = { + // VActive Preferred + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VBlank + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Finally DRR + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, +}; + +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + +static const enum dml2_pmo_pstate_strategy base_strategy_list_2_display[][PMO_DCN4_MAX_DISPLAYS] = { + // VActive only is preferred + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VActive + VBlank + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then VBlank only + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + VBlank + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + DRR + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then SVP + SVP + { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then DRR + VActive + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Then DRR + VBlank + //{ dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + + // Finally DRR + DRR + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, +}; + +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + +static const enum dml2_pmo_pstate_strategy base_strategy_list_3_display[][PMO_DCN4_MAX_DISPLAYS] = { + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive + + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 1 DRR + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 2 DRR + + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR +}; + +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + +static const enum dml2_pmo_pstate_strategy base_strategy_list_4_display[][PMO_DCN4_MAX_DISPLAYS] = { + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive + + { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, // VActive + 1 VBlank + + //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 2 VBlank + + //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank + + { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 1 DRR + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 2 DRR + + //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 3 DRR + + { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR +}; + +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + + +static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) +{ + bool result = true; + + if (*odm_mode == dml2_odm_mode_auto) { + switch (odms_calculated) { + case 1: + *odm_mode = dml2_odm_mode_bypass; + break; + case 2: + *odm_mode = dml2_odm_mode_combine_2to1; + break; + case 3: + *odm_mode = dml2_odm_mode_combine_3to1; + break; + case 4: + *odm_mode = dml2_odm_mode_combine_4to1; + break; + default: + result = false; + break; + } + } + + if (result) { + if (*odm_mode == dml2_odm_mode_bypass) { + *odm_mode = dml2_odm_mode_combine_2to1; + } else if (*odm_mode == dml2_odm_mode_combine_2to1) { + *odm_mode = dml2_odm_mode_combine_3to1; + } else if (*odm_mode == dml2_odm_mode_combine_3to1) { + *odm_mode = dml2_odm_mode_combine_4to1; + } else { + result = false; + } + } + + return result; +} + +static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) +{ + if (*mpc_combine_factor < limit) { + (*mpc_combine_factor)++; + return true; + } + + return false; +} + +static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) +{ + unsigned int i, count; + + count = 0; + for (i = 0; i < display_cfg->num_planes; i++) { + if (display_cfg->plane_descriptors[i].stream_index == stream_index) + count++; + } + + return count; +} + +static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, + int free_pipes) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + bool result = true; + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 + if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { + in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = + in_out->cfg_support_info->plane_support_info[i].dpps_used; + // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. + if (free_pipes > 0) { + if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, + pmo->mpc_combine_limit)) { + // We've reached max pipes allocatable to a single plane, so we fail. + result = false; + break; + } else { + // Successfully added another pipe to this failing plane. + free_pipes--; + } + } else { + // No free pipes to add. + result = false; + break; + } + } else { + // If the stream of this plane needs ODM combine, no further optimization can be done. + result = false; + break; + } + } + } + + return result; +} + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i, used_pipes, free_pipes, planes_on_stream; + bool result; + + if (in_out->display_config != in_out->optimized_display_cfg) { + memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); + } + + //Count number of free pipes, and check if any odm combine is in use. + used_pipes = 0; + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; + } + free_pipes = pmo->ip_caps->pipe_count - used_pipes; + + // Optimization loop + // The goal here is to add more pipes to any planes + // which are failing mcache admissibility + result = true; + + // The optimization logic depends on whether ODM combine is enabled, and the stream count. + if (in_out->optimized_display_cfg->num_streams > 1 || in_out->instance->options->disable_dyn_odm) { + // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes + // which are not ODM combined. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } else if (in_out->optimized_display_cfg->num_streams == 1) { + // In single stream cases, we still optimize mcache failures when there's ODM combine with some + // additional logic. + + if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { + // If ODM combine is enabled, then the logic is to increase ODM combine factor. + + // Optimization for streams with > 1 ODM combine factor is only supported for single display. + planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); + + for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { + // For pipes that failed dcc mcache check, we want to increase the pipe count. + // The logic for doing this depends on how many pipes is already being used, + // and whether it's mpcc or odm combine. + if (!in_out->dcc_mcache_supported[i]) { + // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. + if (free_pipes >= planes_on_stream) { + if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, + in_out->cfg_support_info->plane_support_info[i].dpps_used)) { + result = false; + } else { + free_pipes -= planes_on_stream; + break; + } + } else { + result = false; + break; + } + } + } + } else { + // If ODM combine is not enabled, then we can actually use the same logic as before. + + result = optimize_dcc_mcache_no_odm(in_out, free_pipes); + } + } else { + result = true; + } + + return result; +} + +static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_strategy base_strategy) +{ + enum dml2_pmo_pstate_strategy variant_strategy = 0; + + switch (base_strategy) { + case dml2_pmo_pstate_strategy_vactive: + variant_strategy = dml2_pmo_pstate_strategy_fw_vactive_drr; + break; + case dml2_pmo_pstate_strategy_vblank: + variant_strategy = dml2_pmo_pstate_strategy_fw_vblank_drr; + break; + case dml2_pmo_pstate_strategy_fw_svp: + variant_strategy = dml2_pmo_pstate_strategy_fw_svp_drr; + break; + case dml2_pmo_pstate_strategy_fw_vactive_drr: + case dml2_pmo_pstate_strategy_fw_vblank_drr: + case dml2_pmo_pstate_strategy_fw_svp_drr: + case dml2_pmo_pstate_strategy_fw_drr: + case dml2_pmo_pstate_strategy_reserved_hw: + case dml2_pmo_pstate_strategy_reserved_fw: + case dml2_pmo_pstate_strategy_reserved_fw_drr_fixed: + case dml2_pmo_pstate_strategy_reserved_fw_drr_var: + case dml2_pmo_pstate_strategy_na: + default: + /* no variant for this mode */ + variant_strategy = base_strategy; + } + + return variant_strategy; +} + +static enum dml2_pmo_pstate_strategy(*get_expanded_strategy_list( + struct dml2_pmo_init_data *init_data, + int stream_count))[PMO_DCN4_MAX_DISPLAYS] +{ + enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + + switch (stream_count) { + case 1: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_1_display; + break; + case 2: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_2_display; + break; + case 3: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_3_display; + break; + case 4: + expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_4_display; + break; + default: + break; + } + + return expanded_strategy_list; +} + +static unsigned int get_num_expanded_strategies( + struct dml2_pmo_init_data *init_data, + int stream_count) +{ + return init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]; +} + +static void insert_strategy_into_expanded_list( + const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + int stream_count, + struct dml2_pmo_init_data *init_data) +{ + enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + + expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); + + if (expanded_strategy_list) { + memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++], + per_stream_pstate_strategy, + sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + } +} + +static void expand_base_strategy(struct dml2_pmo_instance *pmo, + const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + unsigned int stream_count) +{ + bool skip_to_next_stream; + bool expanded_strategy_added; + bool skip_iteration; + unsigned int i, j; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + + /* determine number of displays per method */ + for (i = 0; i < stream_count; i++) { + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy_list[i] == base_strategy_list[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + } + + i = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (stream_iteration_indices[0] < stream_count) { + skip_to_next_stream = false; + expanded_strategy_added = false; + skip_iteration = false; + + /* determine what to do for this iteration */ + if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { + /* decrement count and assign method */ + cur_strategy_list[i] = base_strategy_list[stream_iteration_indices[i]]; + num_streams_per_method[stream_iteration_indices[i]] -= 1; + + if (i >= stream_count - 1) { + /* insert into strategy list */ + insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + expanded_strategy_added = true; + } else { + /* skip to next stream */ + skip_to_next_stream = true; + } + } else { + skip_iteration = true; + } + + /* prepare for next iteration */ + if (skip_to_next_stream) { + i++; + } else { + /* restore count */ + if (!skip_iteration) { + num_streams_per_method[stream_iteration_indices[i]] += 1; + } + + /* increment iteration count */ + stream_iteration_indices[i]++; + + /* if iterations are complete, or last stream was reached */ + if ((stream_iteration_indices[i] >= stream_count || expanded_strategy_added) && i > 0) { + /* reset per stream index, decrement i */ + stream_iteration_indices[i] = 0; + i--; + + /* restore previous stream's count and increment index */ + num_streams_per_method[stream_iteration_indices[i]] += 1; + stream_iteration_indices[i]++; + } + } + } +} + +static void expand_variant_strategy(struct dml2_pmo_instance *pmo, + const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + unsigned int stream_count) +{ + unsigned int i; + + bool variant_found = false; + enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + + /* setup variant list as base to start */ + memcpy(cur_strategy_list, base_strategy_list, sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + + for (i = 0; i < stream_count; i++) { + cur_strategy_list[i] = convert_strategy_to_drr_variant(base_strategy_list[i]); + + if (cur_strategy_list[i] != base_strategy_list[i]) { + variant_found = true; + } + + if (i == stream_count - 1 && variant_found) { + insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + } + } +} + +static void expand_base_strategies( + struct dml2_pmo_instance *pmo, + const enum dml2_pmo_pstate_strategy(*base_strategies_list)[PMO_DCN4_MAX_DISPLAYS], + const unsigned int num_base_strategies, + unsigned int stream_count) +{ + unsigned int i; + unsigned int num_pre_variant_strategies; + enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS]; + + /* expand every explicit base strategy (except all DRR) */ + for (i = 0; i < num_base_strategies - 1; i++) { + expand_base_strategy(pmo, base_strategies_list[i], stream_count); + } + + if (stream_count > 1) { + /* expand base strategies to DRR variants */ + num_pre_variant_strategies = get_num_expanded_strategies(&pmo->init_data, stream_count); + expanded_strategy_list = get_expanded_strategy_list(&pmo->init_data, stream_count); + for (i = 0; i < num_pre_variant_strategies; i++) { + expand_variant_strategy(pmo, expanded_strategy_list[i], stream_count); + } + } + + /* add back all DRR */ + insert_strategy_into_expanded_list(base_strategies_list[num_base_strategies - 1], stream_count, &pmo->init_data); +} + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) +{ + int i = 0; + struct dml2_pmo_instance *pmo = in_out->instance; + + pmo->soc_bb = in_out->soc_bb; + pmo->ip_caps = in_out->ip_caps; + pmo->mpc_combine_limit = 2; + pmo->odm_combine_limit = 4; + pmo->min_clock_table_size = in_out->min_clock_table_size; + + pmo->fams_params.v2.subvp.refresh_rate_limit_max = 175; + pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0; + pmo->fams_params.v2.drr.refresh_rate_limit_max = 1000; + pmo->fams_params.v2.drr.refresh_rate_limit_min = 119; + + pmo->options = in_out->options; + + /* generate permutations of p-state configs from base strategy list */ + for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) { + switch (i) { + case 1: + DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + expand_base_strategies(pmo, base_strategy_list_1_display, base_strategy_list_1_display_size, 1); + break; + case 2: + DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + expand_base_strategies(pmo, base_strategy_list_2_display, base_strategy_list_2_display_size, 2); + break; + case 3: + DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + expand_base_strategies(pmo, base_strategy_list_3_display, base_strategy_list_3_display_size, 3); + break; + case 4: + DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + + /* populate list */ + expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4); + break; + default: + break; + } + } + + return true; +} + +static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) +{ + /* + * Htotal, Hblank start/end, and Hsync start/end all must be divisible + * in order for the horizontal timing params to be considered divisible + * by 2. Hsync start is always 0. + */ + unsigned long h_blank_start = timing->h_total - timing->h_front_porch; + + return (timing->h_total % denominator == 0) && + (h_blank_start % denominator == 0) && + (timing->h_blank_end % denominator == 0) && + (timing->h_sync_width % denominator == 0); +} + +static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) +{ + switch (encoder_type) { + case dml2_dp: + case dml2_edp: + case dml2_dp2p0: + case dml2_none: + return true; + case dml2_hdmi: + case dml2_hdmifrl: + default: + return false; + } +} + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) +{ + unsigned int i; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + + if (in_out->instance->options->disable_dyn_odm || + (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) + return false; + + for (i = 0; i < display_config->num_planes; i++) + /* + * vmin optimization is required to be seamlessly switched off + * at any time when the new configuration is no longer + * supported. However switching from ODM combine to MPC combine + * is not always seamless. When there not enough free pipes, we + * will have to use the same secondary OPP heads as secondary + * DPP pipes in MPC combine in new state. This transition is + * expected to cause glitches. To avoid the transition, we only + * allow vmin optimization if the stream's base configuration + * doesn't require MPC combine. This condition checks if MPC + * combine is enabled. If so do not optimize the stream. + */ + if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && + mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) + in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + + for (i = 0; i < display_config->num_streams; i++) { + if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && + in_out->instance->options->disable_dyn_odm_for_stream_with_svp) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * ODM Combine requires horizontal timing divisible by 2 so each + * ODM segment has the same size. + */ + else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + /* + * Our hardware support seamless ODM transitions for DP encoders + * only. + */ + else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) + in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + } + + return true; +} + +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) +{ + bool is_vmin = true; + + if (in_out->vmin_limits->dispclk_khz > 0 && + in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) + is_vmin = false; + + return is_vmin; +} + +static int find_highest_odm_load_stream_index( + const struct dml2_display_cfg *display_config, + const struct dml2_core_mode_support_result *mode_support_result) +{ + unsigned int i; + int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; + + for (i = 0; i < display_config->num_streams; i++) { + odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz + / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; + if (odm_load > highest_odm_load) { + highest_odm_load_index = i; + highest_odm_load = odm_load; + } + } + + return highest_odm_load_index; +} + +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) +{ + int stream_index; + const struct dml2_display_cfg *display_config = + &in_out->base_display_config->display_config; + const struct dml2_core_mode_support_result *mode_support_result = + &in_out->base_display_config->mode_support_result; + unsigned int odms_used; + struct dml2_stream_parameters *stream_descriptor; + bool optimizable = false; + + /* + * highest odm load stream must be optimizable to continue as dispclk is + * bounded by it. + */ + stream_index = find_highest_odm_load_stream_index(display_config, + mode_support_result); + + if (stream_index < 0 || + in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) + return false; + + odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; + if ((int)odms_used >= in_out->instance->odm_combine_limit) + return false; + + memcpy(in_out->optimized_display_config, + in_out->base_display_config, + sizeof(struct display_configuation_with_meta)); + + stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; + while (!optimizable && increase_odm_combine_factor( + &stream_descriptor->overrides.odm_mode, + odms_used)) { + switch (stream_descriptor->overrides.odm_mode) { + case dml2_odm_mode_combine_2to1: + optimizable = true; + break; + case dml2_odm_mode_combine_3to1: + /* + * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 3. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_combine_4to1: + /* + * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of + * actual pixel rate. Therefore horizontal timing must + * be divisible by 4. + */ + if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { + /* + * DSC h slice count must be divisible + * by 4. + */ + if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) + optimizable = true; + } else { + optimizable = true; + } + } + break; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + default: + break; + } + } + + return optimizable; +} + +static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) +{ + *bit_field = *bit_field | (0x1 << bit_offset); +} + +static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) +{ + if (bit_field & (0x1 << bit_offset)) + return true; + + return false; +} + +static void build_synchronized_timing_groups( + struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config) +{ + unsigned int i, j; + struct dml2_timing_cfg *master_timing; + + unsigned int stream_mapped_mask = 0; + unsigned int num_timing_groups = 0; + unsigned int timing_group_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* clear all group masks */ + memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); + memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); + s->pmo_dcn4.num_timing_groups = 0; + + for (i = 0; i < display_config->display_config.num_streams; i++) { + master_timing = &display_config->display_config.stream_descriptors[i].timing; + + /* only need to build group of this stream is not in a group already */ + if (is_bit_set_in_bitfield(stream_mapped_mask, i)) { + continue; + } + set_bit_in_bitfield(&stream_mapped_mask, i); + timing_group_idx = num_timing_groups; + num_timing_groups++; + + /* trivially set default timing group to itself */ + set_bit_in_bitfield(&s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i); + s->pmo_dcn4.group_line_time_us[timing_group_idx] = (double)master_timing->h_total / master_timing->pixel_clock_khz * 1000.0; + + /* if drr is in use, timing is not sychnronizable */ + if (master_timing->drr_config.enabled) { + s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + continue; + } + + /* find synchronizable timing groups */ + for (j = i + 1; j < display_config->display_config.num_streams; j++) { + if (memcmp(master_timing, + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0) { + set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); + set_bit_in_bitfield(&stream_mapped_mask, j); + } + } + } + + s->pmo_dcn4.num_timing_groups = num_timing_groups; +} + +static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned char i; + bool valid = true; + + // Create a remap array to enable simple iteration through only masked stream indicies + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + /* check if stream has enough vactive margin */ + valid &= is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.stream_vactive_capability_mask, i); + } + } + + return valid; +} + +static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned int i; + + bool synchronizable = true; + + /* find first vblank stream index and compare the timing group mask */ + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + if (mask != pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[i]) { + /* vblank streams are not synchronizable */ + synchronizable = false; + } + break; + } + } + + return synchronizable; +} + +static unsigned int calc_svp_microschedule(const struct dml2_fams2_meta *fams2_meta) +{ + return fams2_meta->contention_delay_otg_vlines + + fams2_meta->method_subvp.programming_delay_otg_vlines + + fams2_meta->method_subvp.phantom_vtotal + + fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + fams2_meta->dram_clk_change_blackout_otg_vlines; +} + +static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + unsigned char i; + for (i = 0; i < DML2_MAX_PLANES; i++) { + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_fams2_meta *stream_fams2_meta; + + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; + + if (!stream_descriptor->timing.drr_config.enabled) + return false; + + /* cannot support required vtotal */ + if (stream_fams2_meta->method_drr.stretched_vtotal > stream_fams2_meta->max_vtotal) { + return false; + } + + /* check rr is within bounds */ + if (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min || + stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) { + return false; + } + + /* check required stretch is allowed */ + if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 && + stream_fams2_meta->method_drr.stretched_vtotal - stream_fams2_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) { + return false; + } + } + } + + return true; +} + +static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_config, + unsigned int mask) +{ + const struct dml2_stream_parameters *stream_descriptor; + const struct dml2_plane_parameters *plane_descriptor; + const struct dml2_fams2_meta *stream_fams2_meta; + unsigned int microschedule_vlines; + unsigned int i; + + unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; + + /* confirm timing it is not a centered timing */ + for (i = 0; i < display_config->display_config.num_planes; i++) { + plane_descriptor = &display_config->display_config.plane_descriptors[i]; + + if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) { + num_planes_per_stream[plane_descriptor->stream_index]++; + + /* check recout height covers entire otg vactive, and single plane */ + if (num_planes_per_stream[plane_descriptor->stream_index] > 1 || + !plane_descriptor->composition.rect_out_height_spans_vactive) { + return false; + } + } + } + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(mask, i)) { + stream_descriptor = &display_config->display_config.stream_descriptors[i]; + stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; + + if (stream_descriptor->overrides.disable_subvp) { + return false; + } + + microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_fams2_meta[i]); + + /* block if using an interlaced timing */ + if (stream_descriptor->timing.interlaced) { + return false; + } + + /* 1) svp main stream's vactive must be able to fit the microschedule + * 2) refresh rate must be within the allowed bounds + */ + if (microschedule_vlines >= stream_descriptor->timing.v_active || + (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min || + stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) { + return false; + } + } + } + + return true; +} + +static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +{ + int stream_index; + + scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; + + for (stream_index = 0; stream_index < stream_count; stream_index++) { + scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; + + if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || + per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) + scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; + } + + scratch->pmo_dcn4.num_pstate_candidates++; +} + +static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +{ + unsigned char i; + enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; + + if (strategy == dml2_pmo_pstate_strategy_vactive || strategy == dml2_pmo_pstate_strategy_fw_vactive_drr) + matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + else if (strategy == dml2_pmo_pstate_strategy_vblank || strategy == dml2_pmo_pstate_strategy_fw_vblank_drr) + matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) + return false; + } + } + + return true; +} + +static void build_method_scheduling_params( + struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta, + struct dml2_fams2_meta *stream_fams2_meta) +{ + stream_method_fams2_meta->allow_time_us = + (double)(stream_method_fams2_meta->allow_end_otg_vline - stream_method_fams2_meta->allow_start_otg_vline) * + stream_fams2_meta->otg_vline_time_us; + if (stream_method_fams2_meta->allow_time_us >= stream_method_fams2_meta->period_us) { + /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/ + stream_method_fams2_meta->disallow_time_us = 0.0; + } else { + stream_method_fams2_meta->disallow_time_us = + stream_method_fams2_meta->period_us - stream_method_fams2_meta->allow_time_us; + } +} + +static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( + struct dml2_pmo_instance *pmo, + enum dml2_pmo_pstate_strategy stream_pstate_strategy, + int stream_idx) +{ + struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; + + switch (stream_pstate_strategy) { + case dml2_pmo_pstate_strategy_vactive: + case dml2_pmo_pstate_strategy_fw_vactive_drr: + stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; + break; + case dml2_pmo_pstate_strategy_vblank: + case dml2_pmo_pstate_strategy_fw_vblank_drr: + stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common; + break; + case dml2_pmo_pstate_strategy_fw_svp: + case dml2_pmo_pstate_strategy_fw_svp_drr: + stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common; + break; + case dml2_pmo_pstate_strategy_fw_drr: + stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common; + break; + case dml2_pmo_pstate_strategy_reserved_hw: + case dml2_pmo_pstate_strategy_reserved_fw: + case dml2_pmo_pstate_strategy_reserved_fw_drr_fixed: + case dml2_pmo_pstate_strategy_reserved_fw_drr_var: + case dml2_pmo_pstate_strategy_na: + default: + stream_method_fams2_meta = NULL; + } + + return stream_method_fams2_meta; +} + +static bool is_timing_group_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const unsigned int timing_group_idx, + struct dml2_fams2_per_method_common_meta *group_fams2_meta) +{ + unsigned int i; + struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta; + + unsigned int base_stream_idx = 0; + struct dml2_pmo_scratch *s = &pmo->scratch; + + /* find base stream idx */ + for (base_stream_idx = 0; base_stream_idx < display_cfg->display_config.num_streams; base_stream_idx++) { + if (is_bit_set_in_bitfield(s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], base_stream_idx)) { + /* master stream found */ + break; + } + } + + /* init allow start and end lines for timing group */ + stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[base_stream_idx], base_stream_idx); + group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; + group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; + group_fams2_meta->period_us = stream_method_fams2_meta->period_us; + for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { + if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { + stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[i], i); + + if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { + /* set group allow start to larger otg vline */ + group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline; + } + + if (group_fams2_meta->allow_end_otg_vline > stream_method_fams2_meta->allow_end_otg_vline) { + /* set group allow end to smaller otg vline */ + group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline; + } + + /* check waveform still has positive width */ + if (group_fams2_meta->allow_start_otg_vline >= group_fams2_meta->allow_end_otg_vline) { + /* timing group is not schedulable */ + return false; + } + } + } + + /* calculate the rest of the meta */ + build_method_scheduling_params(group_fams2_meta, &pmo->scratch.pmo_dcn4.stream_fams2_meta[base_stream_idx]); + + return true; +} + +static bool is_config_schedulable( + struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) +{ + unsigned int i, j; + bool schedulable; + struct dml2_pmo_scratch *s = &pmo->scratch; + + memset(s->pmo_dcn4.group_common_fams2_meta, 0, sizeof(s->pmo_dcn4.group_common_fams2_meta)); + memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES); + + /* search for a general solution to the schedule */ + + /* STAGE 0: Early return for special cases */ + if (display_cfg->display_config.num_streams <= 1) { + /* single stream is always schedulable */ + return true; + } + + /* STAGE 1: confirm allow waves overlap for synchronizable streams */ + schedulable = true; + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; + s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; + if (!is_timing_group_schedulable(pmo, display_cfg, per_stream_pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + /* synchronized timing group was not schedulable */ + schedulable = false; + break; + } + } + + if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) { + /* 1. the only timing group was schedulable, so early pass + * 2. one of the timing groups was not schedulable, so early fail */ + return schedulable; + } + + /* STAGE 2: Check allow can't be masked entirely by other disallows */ + schedulable = true; + + /* sort disallow times from greatest to least */ + unsigned int temp; + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us; + double jp1_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us; + if (j_disallow_us < jp1_disallow_us) { + /* swap as A < B */ + temp = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; + s->pmo_dcn4.sorted_group_gtl_disallow_index[j] = s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]; + s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1] = temp; + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* Check worst case disallow region occurs in the middle of allow for the + * other display, or when >2 streams continue to halve the remaining allow time. + */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + if (s->pmo_dcn4.group_common_fams2_meta[i].disallow_time_us <= 0.0) { + /* this timing group always allows */ + continue; + } + + double max_allow_time_us = s->pmo_dcn4.group_common_fams2_meta[i].allow_time_us; + for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) { + unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j]; + /* stream can't overlap itself */ + if (i != sorted_j && s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us > 0.0) { + max_allow_time_us = math_min2( + s->pmo_dcn4.group_common_fams2_meta[sorted_j].allow_time_us, + (max_allow_time_us - s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us) / 2); + + if (max_allow_time_us < 0.0) { + /* failed exit early */ + break; + } + } + } + + if (max_allow_time_us <= 0.0) { + /* not enough time for microschedule in the worst case */ + schedulable = false; + break; + } + } + + if (schedulable) { + return true; + } + + /* STAGE 3: check larger allow can fit period of all other streams */ + schedulable = true; + + /* sort periods from greatest to least */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { + bool swapped = false; + + for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) { + double j_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us; + double jp1_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us; + if (j_period_us < jp1_period_us) { + /* swap as A < B */ + temp = s->pmo_dcn4.sorted_group_gtl_period_index[j]; + s->pmo_dcn4.sorted_group_gtl_period_index[j] = s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]; + s->pmo_dcn4.sorted_group_gtl_period_index[j + 1] = temp; + swapped = true; + } + } + + /* sorted, exit early */ + if (!swapped) + break; + } + + /* check larger allow can fit period of all other streams */ + for (i = 0; i < s->pmo_dcn4.num_timing_groups - 1; i++) { + unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i]; + unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; + if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || + s->pmo_dcn4.group_is_drr_enabled[sorted_ip1]) { + schedulable = false; + break; + } + } + + /* STAGE 4: For similar frequencies, and when using HW exclusive modes, check disallow alignments are within allowed threshold */ + if (s->pmo_dcn4.num_timing_groups == 2 && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[1])) { + double period_delta = s->pmo_dcn4.group_common_fams2_meta[0].period_us - s->pmo_dcn4.group_common_fams2_meta[1].period_us; + + /* default period_0 > period_1 */ + unsigned int lrg_idx = 0; + unsigned int sml_idx = 1; + if (period_delta < 0.0) { + /* period_0 < period_1 */ + lrg_idx = 1; + sml_idx = 0; + period_delta = math_fabs(period_delta); + } + + if (s->pmo_dcn4.group_common_fams2_meta[lrg_idx].disallow_time_us >= s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us) { + double time_until_disallow_us = (s->pmo_dcn4.group_common_fams2_meta[lrg_idx].allow_time_us + + s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us) / + period_delta * + s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us; + double time_until_allow_us = (s->pmo_dcn4.group_common_fams2_meta[lrg_idx].disallow_time_us - + s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us) / + period_delta * + s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us; + + if (time_until_disallow_us > PMO_DCN4_MIN_TIME_TO_DISALLOW_MS && + time_until_allow_us < pmo->ip_caps->fams2.max_allow_delay_us) { + schedulable = true; + } + } else { + /* if the allow is not maskable, it is always schedulable within a frame */ + schedulable = true; + } + } + + return schedulable; +} + +static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_strategy stream_pstate_strategy, + unsigned int stream_index) +{ + const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; + bool strategy_matches_drr_requirements = true; + + /* check if strategy is compatible with stream drr capability and strategy */ + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + display_cfg->display_config.num_streams > 1 && + stream_descriptor->timing.drr_config.enabled && + (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { + /* DRR is active, so config may become unschedulable */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR is variable, fw exclusive methods require DRR to be fixed */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + pmo->options->disable_drr_var_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable) { + /* DRR variable is active, but policy blocks DRR for p-state when this happens */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + (pmo->options->disable_drr_var || + !stream_descriptor->timing.drr_config.enabled || + stream_descriptor->timing.drr_config.disallowed)) { + /* DRR variable strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_DRR_FIXED_STRATEGY_MASK, stream_pstate_strategy) && + (pmo->options->disable_drr_fixed || + (stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { + /* DRR fixed strategies are disallowed due to settings or policy */ + strategy_matches_drr_requirements = false; + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + pmo->options->disable_fams2) { + /* FW modes require FAMS2 */ + strategy_matches_drr_requirements = false; + } + + return strategy_matches_drr_requirements; +} + +static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) +{ + struct dml2_pmo_scratch *s = &pmo->scratch; + + unsigned char stream_index = 0; + + unsigned int svp_count = 0; + unsigned int svp_stream_mask = 0; + unsigned int drr_count = 0; + unsigned int drr_stream_mask = 0; + unsigned int vactive_count = 0; + unsigned int vactive_stream_mask = 0; + unsigned int vblank_count = 0; + unsigned int vblank_stream_mask = 0; + + bool strategy_matches_forced_requirements = true; + bool strategy_matches_drr_requirements = true; + + // Tabulate everything + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + + if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + per_stream_pstate_strategy[stream_index])) { + strategy_matches_forced_requirements = false; + break; + } + + strategy_matches_drr_requirements = + stream_matches_drr_policy(pmo, display_cfg, per_stream_pstate_strategy[stream_index], stream_index); + + if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + svp_count++; + set_bit_in_bitfield(&svp_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + drr_count++; + set_bit_in_bitfield(&drr_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive || + per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + vactive_count++; + set_bit_in_bitfield(&vactive_stream_mask, stream_index); + } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || + per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + vblank_count++; + set_bit_in_bitfield(&vblank_stream_mask, stream_index); + } + } + + if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) + return false; + + if (vactive_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))) + return false; + + if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) + return false; + + if (drr_count > 0 && (pmo->options->disable_drr_var || !all_timings_support_drr(pmo, display_cfg, drr_stream_mask))) + return false; + + if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) + return false; + + return is_config_schedulable(pmo, display_cfg, per_stream_pstate_strategy); +} + +static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) +{ + unsigned char i; + int min_vactive_margin_us = 0xFFFFFFF; + + for (i = 0; i < DML2_MAX_PLANES; i++) { + if (is_bit_set_in_bitfield(plane_mask, i)) { + if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) + min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; + } + } + + return min_vactive_margin_us; +} + +static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + const struct dml2_ip_capabilities *ip_caps = pmo->ip_caps; + const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index]; + const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index]; + const struct dml2_timing_cfg *timing = &stream_descriptor->timing; + struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index]; + + /* worst case all other streams require some programming at the same time, 0 if only 1 stream */ + unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us + + (unsigned int)math_max3(ip_caps->fams2.subvp_programming_delay_us, ip_caps->fams2.drr_programming_delay_us, ip_caps->fams2.allow_programming_delay_us)) * + (display_config->display_config.num_streams - 1); + + /* common */ + stream_fams2_meta->valid = true; + stream_fams2_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0; + stream_fams2_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active; + stream_fams2_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_fams2_meta->nom_vtotal * timing->h_total); + stream_fams2_meta->nom_frame_time_us = + (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; + + if (stream_descriptor->timing.drr_config.enabled == true) { + if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { + stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9); + } else { + /* assume min of 48Hz */ + stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz / + (48000000.0 * stream_descriptor->timing.h_total) * 1e9); + } + } else { + stream_fams2_meta->max_vtotal = stream_fams2_meta->nom_vtotal; + } + stream_fams2_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 / + (stream_fams2_meta->max_vtotal * timing->h_total); + stream_fams2_meta->max_frame_time_us = + (double)stream_fams2_meta->max_vtotal * stream_fams2_meta->otg_vline_time_us; + + stream_fams2_meta->scheduling_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->contention_delay_otg_vlines = + (unsigned int)math_ceil(contention_delay_us / stream_fams2_meta->otg_vline_time_us); + /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */ + stream_fams2_meta->allow_to_target_delay_otg_vlines = + (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_fams2_meta->otg_vline_time_us)) + 1; + stream_fams2_meta->min_allow_width_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_fams2_meta->otg_vline_time_us); + /* this value should account for urgent latency */ + stream_fams2_meta->dram_clk_change_blackout_otg_vlines = + (unsigned int)math_ceil(display_config->mode_support_result.cfg_support_info.clean_me_up.support_info.watermarks.DRAMClockChangeWatermark / + stream_fams2_meta->otg_vline_time_us); + + /* scheduling params should be built based on the worst case for allow_time:disallow_time */ + + /* vactive */ + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->max_vactive_det_fill_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->method_vactive.common.allow_start_otg_vline = + timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + stream_fams2_meta->method_vactive.common.allow_end_otg_vline = + timing->v_blank_end + timing->v_active - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + stream_fams2_meta->method_vactive.common.period_us = stream_fams2_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); + + /* vblank */ + stream_fams2_meta->method_vblank.common.allow_start_otg_vline = + timing->v_blank_end + timing->v_active; + stream_fams2_meta->method_vblank.common.allow_end_otg_vline = + stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; + stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_fams2_meta->method_vblank.common, stream_fams2_meta); + + /* subvp */ + stream_fams2_meta->method_subvp.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->method_subvp.phantom_vactive = + stream_fams2_meta->allow_to_target_delay_otg_vlines + + stream_fams2_meta->min_allow_width_otg_vlines + + stream_info->phantom_min_v_active; + stream_fams2_meta->method_subvp.phantom_vfp = + stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines; + /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/ + stream_fams2_meta->method_subvp.phantom_vtotal = + stream_info->phantom_v_startup + + stream_fams2_meta->method_subvp.phantom_vfp + + 1 + + stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines + + stream_fams2_meta->method_subvp.phantom_vactive; + stream_fams2_meta->method_subvp.common.allow_start_otg_vline = + stream_descriptor->timing.v_blank_end + + stream_fams2_meta->contention_delay_otg_vlines + + stream_fams2_meta->method_subvp.programming_delay_otg_vlines + + stream_fams2_meta->method_subvp.phantom_vtotal + + stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + + stream_fams2_meta->allow_to_target_delay_otg_vlines; + stream_fams2_meta->method_subvp.common.allow_end_otg_vline = + stream_fams2_meta->nom_vtotal - + timing->v_front_porch - + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; + build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); + + /* drr */ + stream_fams2_meta->method_drr.programming_delay_otg_vlines = + (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); + stream_fams2_meta->method_drr.common.allow_start_otg_vline = + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->allow_to_target_delay_otg_vlines; + stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; + if (display_config->display_config.num_streams <= 1) { + /* only need to stretch vblank for blackout time */ + stream_fams2_meta->method_drr.stretched_vtotal = + stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->min_allow_width_otg_vlines + + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + } else { + /* multi display needs to always be schedulable */ + stream_fams2_meta->method_drr.stretched_vtotal = + stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->min_allow_width_otg_vlines + + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + } + stream_fams2_meta->method_drr.common.allow_end_otg_vline = + stream_fams2_meta->method_drr.stretched_vtotal - + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; + build_method_scheduling_params(&stream_fams2_meta->method_drr.common, stream_fams2_meta); +} + +static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo, + struct display_configuation_with_meta *display_config, + int stream_index) +{ + struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; + struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index]; + + stream_svp_meta->valid = true; + + /* PMO FAMS2 precaulcates these values */ + stream_svp_meta->v_active = stream_fams2_meta->method_subvp.phantom_vactive; + stream_svp_meta->v_front_porch = stream_fams2_meta->method_subvp.phantom_vfp; + stream_svp_meta->v_total = stream_fams2_meta->method_subvp.phantom_vtotal; +} + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) +{ + struct dml2_pmo_instance *pmo = in_out->instance; + struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; + struct dml2_pmo_scratch *s = &pmo->scratch; + + struct display_configuation_with_meta *display_config; + const struct dml2_plane_parameters *plane_descriptor; + const enum dml2_pmo_pstate_strategy(*strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + unsigned int strategy_list_size = 0; + unsigned int plane_index, stream_index, i; + + state->performed = true; + in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; + + display_config = in_out->base_display_config; + display_config->display_config.overrides.enable_subvp_implicit_pmo = true; + + memset(s, 0, sizeof(struct dml2_pmo_scratch)); + + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + pmo->scratch.pmo_dcn4.max_latency_index = pmo->min_clock_table_size; + pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; + + // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; + + set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); + + state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + } + + // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) + set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); + + /* FAMS2 meta */ + build_fams2_meta_per_stream(pmo, display_config, stream_index); + + /* SVP meta */ + build_subvp_meta_per_stream(pmo, display_config, stream_index); + } + + /* get synchronized timing groups */ + build_synchronized_timing_groups(pmo, display_config); + + strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + + if (strategy_list_size == 0) + return false; + + s->pmo_dcn4.num_pstate_candidates = 0; + + for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { + insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + } + } + + if (s->pmo_dcn4.num_pstate_candidates > 0) { + // There's this funny case... + // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 + // Otherwise the current index should be -1 because we run the optimization at least once + s->pmo_dcn4.cur_pstate_candidate = 0; + for (i = 0; i < display_config->display_config.num_streams; i++) { + if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { + s->pmo_dcn4.cur_pstate_candidate = -1; + break; + } + } + return true; + } else { + return false; + } +} + +static void reset_display_configuration(struct display_configuation_with_meta *display_config) +{ + unsigned int plane_index; + unsigned int stream_index; + struct dml2_plane_parameters *plane; + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + display_config->stage3.stream_svp_meta[stream_index].valid = false; + + display_config->display_config.stream_descriptors[stream_index].overrides.minimize_active_latency_hiding = false; + display_config->display_config.overrides.best_effort_min_active_latency_hiding_us = 0; + } + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + // Unset SubVP + plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; + + // Remove reserve time + plane->overrides.reserved_vblank_time_ns = 0; + + // Reset strategy to auto + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; + } +} + +static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; + + } + } +} + +static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned int plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + unsigned char plane_index; + int stream_index = -1; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom_drr; + } + } + + if (stream_index >= 0) { + memcpy(&display_config->stage3.stream_svp_meta[stream_index], + &scratch->pmo_dcn4.stream_svp_meta[stream_index], + sizeof(struct dml2_implicit_svp_meta)); + } +} + +static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + + plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; + + } + } +} + +static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned char plane_index; + struct dml2_plane_parameters *plane; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + plane = &display_config->display_config.plane_descriptors[plane_index]; + plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vblank_drr; + } + } +} + +static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned int plane_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + } + } +} + +static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_with_meta *display_config, + struct dml2_pmo_instance *pmo, + int plane_mask) +{ + unsigned char plane_index; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vactive_drr; + } + } +} + +static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_instance *pmo, int strategy_index) +{ + struct dml2_pmo_scratch *scratch = &pmo->scratch; + + bool fams2_required = false; + bool success = true; + unsigned int stream_index; + + reset_display_configuration(display_config); + + for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { + + if (pmo->scratch.pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + success = false; + break; + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + fams2_required = true; + setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + fams2_required = true; + setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + fams2_required = true; + setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + fams2_required = true; + setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + fams2_required = true; + setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); + } + } + + /* copy FAMS2 meta */ + if (fams2_required) { + display_config->stage3.fams2_required = fams2_required; + memcpy(&display_config->stage3.stream_fams2_meta, + &scratch->pmo_dcn4.stream_fams2_meta, + sizeof(struct dml2_fams2_meta) * DML2_MAX_PLANES); + } + + return success; +} + +static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) +{ + int min_time_us = 0xFFFFFF; + unsigned int plane_index = 0; + + for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { + if (is_bit_set_in_bitfield(plane_mask, plane_index)) { + if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) + min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; + } + } + return min_time_us; +} + +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) +{ + bool p_state_supported = true; + unsigned int stream_index; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + int MIN_VACTIVE_MARGIN_VBLANK = 0; + int MIN_VACTIVE_MARGIN_DRR = 0; + int REQUIRED_RESERVED_TIME = 0; + + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; + MIN_VACTIVE_MARGIN_DRR = INT_MIN; + REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; + + if (s->pmo_dcn4.cur_pstate_candidate < 0) + return false; + + for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { + + if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive || + s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us)) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank || + s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < + REQUIRED_RESERVED_TIME || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp || + s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { + p_state_supported = false; + break; + } + } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + p_state_supported = false; + break; + } + } + + return p_state_supported; +} + +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_scratch *s = &in_out->instance->scratch; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (in_out->last_candidate_failed) { + if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + s->pmo_dcn4.cur_latency_index++; + + success = true; + } + } + + if (!success) { + s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; + s->pmo_dcn4.cur_pstate_candidate++; + + if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { + success = true; + } + } + + if (success) { + in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; + setup_display_config(in_out->optimized_display_config, in_out->instance, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); + } + + return success; +} + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + bool stutter_period_meets_z8_eco = true; + bool z8_stutter_optimization_too_expensive = false; + double line_time_us, vblank_nom_time_us; + + unsigned int i; + + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) + return false; // Unexpected SoCBB setup + + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { + if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[i].active_latency_hiding_us < + pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us + pmo->soc_bb->power_management_parameters.z8_min_idle_time) { + stutter_period_meets_z8_eco = false; + break; + } + } + + for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000; + vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom; + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + z8_stutter_optimization_too_expensive = true; + break; + } + } + + pmo->scratch.pmo_dcn4.num_stutter_candidates = 0; + pmo->scratch.pmo_dcn4.cur_stutter_candidate = 0; + + if (stutter_period_meets_z8_eco && !z8_stutter_optimization_too_expensive) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = true; + } + } else { + pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false; + } + + if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { + pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; + pmo->scratch.pmo_dcn4.num_stutter_candidates++; + } + + if (pmo->scratch.pmo_dcn4.num_stutter_candidates == 0) + success = false; + + return success; +} + +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + bool success = true; + struct dml2_pmo_instance *pmo = in_out->instance; + + unsigned int i; + + for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && + pmo->scratch.pmo_dcn4.z8_vblank_optimizable && + in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + success = false; + break; + } + if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && + in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + success = false; + break; + } + } + + return success; +} + +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + bool success = false; + struct dml2_pmo_instance *pmo = in_out->instance; + unsigned int i; + + memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); + + if (!in_out->last_candidate_failed) { + if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { + for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { + in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + } + + success = true; + } + } + + return success; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h new file mode 100644 index 000000000000..75175d93add4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_PMO_FAMS2_DCN4_H__ +#define __DML2_PMO_FAMS2_DCN4_H__ + +#include "dml2_internal_shared_types.h" + +bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out); + +bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + +bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); +bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c new file mode 100644 index 000000000000..a34506a78c50 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_pmo_factory.h" +#include "dml2_pmo_dcn4_fams2.h" +#include "dml2_pmo_dcn4.h" +#include "dml2_pmo_dcn3.h" +#include "dml2_external_lib_deps.h" + +static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out) +{ + return false; +} + +static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out) +{ + return true; +} + +static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out) +{ + return false; +} + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out) +{ + bool result = false; + + if (out == 0) + return false; + + memset(out, 0, sizeof(struct dml2_pmo_instance)); + + switch (project_id) { + case dml2_project_dcn4x_stage1: + out->initialize = pmo_dcn4_initialize; + out->optimize_dcc_mcache = pmo_dcn4_optimize_dcc_mcache; + result = true; + break; + case dml2_project_dcn4x_stage2: + out->initialize = pmo_dcn3_initialize; + + out->optimize_dcc_mcache = pmo_dcn3_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn3_init_for_vmin; + out->test_for_vmin = pmo_dcn3_test_for_vmin; + out->optimize_for_vmin = pmo_dcn3_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn3_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn3_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn3_optimize_for_pstate_support; + + out->init_for_stutter = dummy_init_for_stutter; + out->test_for_stutter = dummy_test_for_stutter; + out->optimize_for_stutter = dummy_optimize_for_stutter; + + result = true; + break; + case dml2_project_dcn4x_stage2_auto_drr_svp: + out->initialize = pmo_dcn4_fams2_initialize; + + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; + + out->init_for_vmin = pmo_dcn4_fams2_init_for_vmin; + out->test_for_vmin = pmo_dcn4_fams2_test_for_vmin; + out->optimize_for_vmin = pmo_dcn4_fams2_optimize_for_vmin; + + out->init_for_uclk_pstate = pmo_dcn4_fams2_init_for_pstate_support; + out->test_for_uclk_pstate = pmo_dcn4_fams2_test_for_pstate_support; + out->optimize_for_uclk_pstate = pmo_dcn4_fams2_optimize_for_pstate_support; + + out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; + out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; + out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; + + result = true; + break; + case dml2_project_invalid: + default: + break; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h new file mode 100644 index 000000000000..0cdf4f4ccfc0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_PMO_FACTORY_H__ +#define __DML2_PMO_FACTORY_H__ + +#include "dml2_internal_shared_types.h" +#include "dml_top_types.h" + +bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c new file mode 100644 index 000000000000..178bb21bcdc0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "lib_float_math.h" + +#ifndef ASSERT +#define ASSERT(condition) +#endif + +#define isNaN(number) ((number) != (number)) + + /* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +double math_mod(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 - arg1 * ((int)(arg1 / arg2)); +} + +double math_min2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 < arg2 ? arg1 : arg2; +} + +double math_max2(const double arg1, const double arg2) +{ + if (isNaN(arg1)) + return arg2; + if (isNaN(arg2)) + return arg1; + return arg1 > arg2 ? arg1 : arg2; +} + +double math_floor2(const double arg, const double significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance)) * significance; +} + +double math_floor(const double arg) +{ + return ((int)(arg)); +} + +double math_ceil(const double arg) +{ + return (int)(arg + 0.99999); +} + +double math_ceil2(const double arg, const double significance) +{ + ASSERT(significance != 0); + + return ((int)(arg / significance + 0.99999)) * significance; +} + +double math_max3(double v1, double v2, double v3) +{ + return v3 > math_max2(v1, v2) ? v3 : math_max2(v1, v2); +} + +double math_max4(double v1, double v2, double v3, double v4) +{ + return v4 > math_max3(v1, v2, v3) ? v4 : math_max3(v1, v2, v3); +} + +double math_max5(double v1, double v2, double v3, double v4, double v5) +{ + return math_max3(v1, v2, v3) > math_max2(v4, v5) ? math_max3(v1, v2, v3) : math_max2(v4, v5); +} + +float math_pow(float a, float exp) +{ + double temp; + if ((int)exp == 0) + return 1; + temp = math_pow(a, (float)((int)(exp / 2))); + if (((int)exp % 2) == 0) { + return (float)(temp * temp); + } else { + if ((int)exp > 0) + return (float)(a * temp * temp); + else + return (float)((temp * temp) / a); + } +} + +double math_fabs(double a) +{ + if (a > 0) + return (a); + else + return (-a); +} + +float math_log(float a, float b) +{ + int *const exp_ptr = (int *)(&a); + int x = *exp_ptr; + const int log_2 = ((x >> 23) & 255) - 128; + x &= ~(255 << 23); + x += 127 << 23; + *exp_ptr = x; + + a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3; + + if (b > 2.00001 || b < 1.99999) + return (a + log_2) / math_log(b, 2); + else + return (a + log_2); +} + +float math_log2(float a) +{ + return math_log(a, 2.0); +} + +double math_round(double a) +{ + const double round_pt = 0.5; + + return math_floor(a + round_pt); +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h new file mode 100644 index 000000000000..8f595b441dd0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __LIB_FLOAT_MATH_H__ +#define __LIB_FLOAT_MATH_H__ + +double math_mod(const double arg1, const double arg2); +double math_min2(const double arg1, const double arg2); +double math_max2(const double arg1, const double arg2); +double math_floor2(const double arg, const double significance); +double math_floor(const double arg); +double math_ceil(const double arg); +double math_ceil2(const double arg, const double significance); +double math_max3(double v1, double v2, double v3); +double math_max4(double v1, double v2, double v3, double v4); +double math_max5(double v1, double v2, double v3, double v4, double v5); +float math_pow(float a, float exp); +double math_fabs(double a); +float math_log(float a, float b); +float math_log2(float a); +double math_round(double a); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c new file mode 100644 index 000000000000..1b6dbfaa7ae8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_top_optimization.h" +#include "dml2_internal_shared_types.h" +#include "dml_top_mcache.h" + +static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) +{ + memcpy(dst, src, sizeof(struct display_configuation_with_meta)); +} + +bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + state->performed = true; + + return true; +} + +bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + return state->min_clk_index_for_latency == 0; +} + +bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) +{ + bool result = false; + + if (params->display_config->stage1.min_clk_index_for_latency > 0) { + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + params->optimized_display_config->stage1.min_clk_index_for_latency--; + result = true; + } + + return result; +} + +bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + bool mcache_success = false; + bool result = false; + + memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); + + l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; + l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; + l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + + result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations + + if (result) { + l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; + + dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); + + l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; + l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; + l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; + + mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works + + memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); + } + + return mcache_success; +} + +bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + bool optimize_success = false; + + if (params->last_candidate_supported == false) + return false; + + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + + l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; + l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; + l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; + l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; + l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; + + optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); + + return optimize_success; +} + +bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->vmin.init_params.instance = ¶ms->dml->pmo_instance; + l->vmin.init_params.base_display_config = params->display_config; + return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); +} + +bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; + l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; + l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; + return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); +} + +bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + if (params->last_candidate_supported == false) + return false; + + l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; + l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; + l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; + return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); +} + +bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + bool test_passed = false; + bool optimize_succeeded = true; + bool candidate_validation_passed = true; + struct optimization_init_function_params init_params = { 0 }; + struct optimization_test_function_params test_params = { 0 }; + struct optimization_optimize_function_params optimize_params = { 0 }; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + + init_params.locals = &l->init_function_locals; + init_params.dml = params->dml; + init_params.display_config = &l->cur_candidate_display_cfg; + + if (params->init_function && !params->init_function(&init_params)) + return false; + + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->cur_candidate_display_cfg; + + test_passed = params->test_function(&test_params); + + while (!test_passed && optimize_succeeded) { + memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); + + optimize_params.locals = &l->optimize_function_locals; + optimize_params.dml = params->dml; + optimize_params.display_config = &l->cur_candidate_display_cfg; + optimize_params.optimized_display_config = &l->next_candidate_display_cfg; + optimize_params.last_candidate_supported = candidate_validation_passed; + + optimize_succeeded = params->optimize_function(&optimize_params); + + if (optimize_succeeded) { + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + + if (l->next_candidate_display_cfg.stage3.performed) + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; + else + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; + + candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); + + l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + } + + if (optimize_succeeded && candidate_validation_passed) { + memset(&test_params, 0, sizeof(struct optimization_test_function_params)); + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->next_candidate_display_cfg; + test_passed = params->test_function(&test_params); + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); + + // If optimization is not all or nothing, then store partial progress in output + if (!params->all_or_nothing) + copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); + } + } + + if (test_passed) + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return test_passed; +} + +bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + int highest_state, lowest_state, cur_state; + bool supported = false; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; + lowest_state = 0; + cur_state = 0; + + while (highest_state > lowest_state) { + cur_state = (highest_state + lowest_state) / 2; + + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + l->mode_support_params.min_clk_index = cur_state; + + supported = params->dml->core_instance.mode_support(&l->mode_support_params); + + if (supported) { + l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + highest_state = cur_state; + } else { + lowest_state = cur_state + 1; + } + } + l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; + + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return true; +} + +bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); +} + +bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.test_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); +} + +bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.optimize_params.base_display_config = params->display_config; + l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; + l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; + + return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); +} + +bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); +} + +bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); +} + +bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; + l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; + return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h new file mode 100644 index 000000000000..1536afcbf73a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_TOP_OPTIMIZATION_H__ +#define __DML2_TOP_OPTIMIZATION_H__ + +#include "dml2_external_lib_deps.h" +#include "dml2_internal_shared_types.h" + +bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); +bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); + +bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params); +bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params); +bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params); + +bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params); +bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params); + +bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params); +bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params); +bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params); + +bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params); +bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params); +bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params); + +bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params); +bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params); +bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c new file mode 100644 index 000000000000..5ba849aad9d0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_internal_shared_types.h" +#include "dml_top.h" +#include "dml2_mcg_factory.h" +#include "dml2_core_factory.h" +#include "dml2_dpmm_factory.h" +#include "dml2_pmo_factory.h" +#include "dml_top_mcache.h" +#include "dml2_top_optimization.h" +#include "dml2_external_lib_deps.h" + +unsigned int dml2_get_instance_size_bytes(void) +{ + return sizeof(struct dml2_instance); +} + +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; + struct dml2_core_initialize_in_out core_init_params = { 0 }; + struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; + struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; + bool result = false; + + memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + dml->project_id = in_out->options.project_id; + dml->pmo_options = in_out->options.pmo_options; + + // Initialize All Components + result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); + + if (result) + result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); + + if (result) + result = dml2_core_create(in_out->options.project_id, &dml->core_instance); + + if (result) { + mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; + mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; + result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); + } + + if (result) { + core_init_params.project_id = in_out->options.project_id; + core_init_params.instance = &dml->core_instance; + core_init_params.minimum_clock_table = &dml->min_clk_table; + core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; + core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; + core_init_params.ip_caps = &in_out->ip_caps; + core_init_params.soc_bb = &in_out->soc_bb; + result = dml->core_instance.initialize(&core_init_params); + + if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + } + } + + if (result) + result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); + + if (result) { + pmo_init_params.instance = &dml->pmo_instance; + pmo_init_params.soc_bb = &dml->soc_bbox; + pmo_init_params.ip_caps = &dml->ip_caps; + pmo_init_params.min_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; + pmo_init_params.options = &dml->pmo_options; + dml->pmo_instance.initialize(&pmo_init_params); + } + + return result; +} + +static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; +} + +static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = 0; +} + +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; + + bool result = false; + bool mcache_success = false; + + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (result) { + struct optimization_phase_params mcache_phase = { + .dml = dml, + .display_config = &l->base_display_config_with_meta, + .test_function = dml2_top_optimization_test_function_mcache, + .optimize_function = dml2_top_optimization_optimize_function_mcache, + .optimized_display_config = &l->optimized_display_config_with_meta, + .all_or_nothing = false, + }; + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); + } + + in_out->is_supported = mcache_success; + + return result; +} + +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; + + bool result = false; + bool mcache_success = false; + bool uclk_pstate_success = false; + bool vmin_success = false; + bool stutter_success = false; + unsigned int i; + + memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); + memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); + + memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); + + setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + dml->core_instance.populate_informative(&l->informative_params); + + return false; + } + + /* + * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode + */ + memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); + l->min_clock_for_latency_phase.dml = dml; + l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; + l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; + l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->min_clock_for_latency_phase.all_or_nothing = false; + + dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + } + + /* + * Phase 2: Satisfy DCC mcache requirements + */ + memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); + l->mcache_phase.dml = dml; + l->mcache_phase.display_config = &l->base_display_config_with_meta; + l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; + l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; + l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->mcache_phase.all_or_nothing = true; + + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); + + if (!mcache_success) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + + dml->core_instance.populate_informative(&l->informative_params); + + in_out->programming->informative.failed_mcache_validation = true; + return false; + } + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + + /* + * Phase 3: Optimize for Pstate + */ + memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); + l->uclk_pstate_phase.dml = dml; + l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; + l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; + l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; + l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; + l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->uclk_pstate_phase.all_or_nothing = true; + + uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); + + if (uclk_pstate_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage3.success = true; + } + + /* + * Phase 4: Optimize for Vmin + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->vmin_phase.dml = dml; + l->vmin_phase.display_config = &l->base_display_config_with_meta; + l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; + l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; + l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; + l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->vmin_phase.all_or_nothing = false; + + vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); + + if (vmin_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = true; + } + + /* + * Phase 5: Optimize for Stutter + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->stutter_phase.dml = dml; + l->stutter_phase.display_config = &l->base_display_config_with_meta; + l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; + l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; + l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; + l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->stutter_phase.all_or_nothing = true; + + stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); + + if (stutter_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = true; + } + + /* + * Populate mcache programming + */ + for (i = 0; i < in_out->display_config->num_planes; i++) { + in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i]; + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = in_out->programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + if (!result) + in_out->programming->informative.failed_dpmm = true; + } + + if (result) { + l->mode_programming_params.instance = &dml->core_instance; + l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; + l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; + l->mode_programming_params.programming = in_out->programming; + + result = dml->core_instance.mode_programming(&l->mode_programming_params); + if (!result) + in_out->programming->informative.failed_mode_programming = true; + } + + if (result) { + l->dppm_map_watermarks_params.core = &dml->core_instance; + l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_watermarks_params.programming = in_out->programming; + result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); + } + + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = result; + + dml->core_instance.populate_informative(&l->informative_params); + + return result; +} + +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) +{ + return dml2_top_mcache_build_mcache_programming(in_out); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c new file mode 100644 index 000000000000..188e482d3396 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_debug.h" + +#include "dml_top_mcache.h" +#include "lib_float_math.h" + +#include "dml2_internal_shared_types.h" + +/* +* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. +* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means +* that the horizontal viewport does not span more than 2 cache slices. +* +* It optionally also can apply a constant shift to all the cache boundaries. +*/ +static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; +static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; + +static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, + int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) +{ + const int MAX_VP = 0xFFFFFF; + int left_cache_id; + int right_cache_id; + int range_start; + int range_end; + bool success = false; + + if (num_boundaries <= 1) { + if (first_offset && second_offset) { + *first_offset = 0; + *second_offset = -1; + } + success = true; + return success; + } else { + range_start = 0; + for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { + range_end = mcache_boundaries[left_cache_id] - shift - 1; + + if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) + break; + + range_start = range_end + 1; + } + + range_end = MAX_VP; + for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { + if (right_cache_id >= 0) + range_start = mcache_boundaries[right_cache_id] - shift; + else + range_start = 0; + + if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { + break; + } + range_end = range_start - 1; + } + right_cache_id = (right_cache_id + 1) % num_boundaries; + + if (right_cache_id == left_cache_id) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = -1; + } + success = true; + } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = right_cache_id; + } + success = true; + } + } + + return success; +} + +/* +* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. +* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. +*/ +static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, + int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) +{ + int max_shift = 0xFFFF; + unsigned int pipe_index; + unsigned int i, slice_width; + bool success = false; + + for (i = 0; i < num_boundaries; i++) { + if (i == 0) + slice_width = mcache_boundaries[i]; + else + slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; + + if (max_shift > (int)slice_width) { + max_shift = slice_width; + } + } + + for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { + success = true; + for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { + if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, + pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { + success = false; + break; + } + } + if (success) + break; + } + + return success; +} + +/* +* Counts the number of elements inside input array within the given span length. +* Formally, what is the size of the largest subset of the array where the largest and smallest element +* differ no more than the span. +*/ +static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) +{ + unsigned int i; + unsigned int span_start_value; + unsigned int span_start_index; + unsigned int greatest_element_count; + + if (array_size == 0) + return 1; + + if (span == 0) + return array_size > 0 ? 1 : 0; + + span_start_value = 0; + span_start_index = 0; + greatest_element_count = 0; + + while (span_start_index < array_size) { + for (i = span_start_index; i < array_size; i++) { + if (array[i] - span_start_value > span) { + if (i - span_start_index + 1 > greatest_element_count) { + greatest_element_count = i - span_start_index + 1; + } + break; + } + } + + span_start_index++; + + if (span_start_index < array_size) { + span_start_value = array[span_start_index - 1] + 1; + } + } + + return greatest_element_count; +} + +static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, + enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) +{ + int i, slice_width; + const char MAX_SCL_VP_OVERLAP = 3; + bool success = false; + + switch (scaling_transform) { + case dml2_scaling_transform_centered: + case dml2_scaling_transform_aspect_ratio: + case dml2_scaling_transform_fullscreen: + slice_width = full_vp_width / num_pipes; + for (i = 0; i < num_pipes; i++) { + pipe_vp_x_start[i] = i * slice_width; + pipe_vp_x_end[i] = (i + 1) * slice_width - 1; + + if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) + pipe_vp_x_start[i] = 0; + else + pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; + + if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) + pipe_vp_x_end[i] = full_vp_width - 1; + else + pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; + } + break; + case dml2_scaling_transform_explicit: + default: + success = false; + break; + } + + return success; +} + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; + + const int MAX_PIXEL_OVERLAP = 6; + int max_per_pipe_vp_p0 = 0; + int max_per_pipe_vp_p1 = 0; + int temp, p0shift, p1shift; + unsigned int plane_index = 0; + unsigned int i; + char odm_combine_factor = 1; + char mpc_combine_factor = 1; + char num_dpps; + unsigned int num_boundaries; + enum dml2_scaling_transform scaling_transform; + const struct dml2_plane_parameters *plane; + const struct dml2_stream_parameters *stream; + + bool p0pass = false; + bool p1pass = false; + bool all_pass = true; + + for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { + if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) + continue; + + plane = ¶ms->display_cfg->plane_descriptors[plane_index]; + stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; + + odm_combine_factor = (char)params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + + if (odm_combine_factor == 1) + mpc_combine_factor = (char)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + else + mpc_combine_factor = 1; + + if (odm_combine_factor > 1) { + max_per_pipe_vp_p0 = plane->surface.plane0.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p0) + max_per_pipe_vp_p0 = temp; + + max_per_pipe_vp_p1 = plane->surface.plane1.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p1) + max_per_pipe_vp_p1 = temp; + } else { + max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; + max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; + } + + max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; + max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; + + p0shift = 0; + p1shift = 0; + + // The last element in the unshifted boundary array will always be the first pixel outside the + // plane, which means theres no mcache associated with it, so -1 + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; + if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) { + p0pass = true; + } + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; + if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) { + p1pass = true; + } + + if (!p0pass || !p1pass) { + if (odm_combine_factor > 1) { + num_dpps = odm_combine_factor; + scaling_transform = plane->composition.scaling_transform; + } else { + num_dpps = mpc_combine_factor; + scaling_transform = dml2_scaling_transform_fullscreen; + } + + if (!p0pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane0.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + params->mcache_allocations[plane_index].num_mcaches_plane0, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); + } + } + if (!p1pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane1.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + params->mcache_allocations[plane_index].num_mcaches_plane1, + &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); + } + } + } + + if (p0pass && p1pass) { + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; + } + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; + } + } + + params->per_plane_status[plane_index] = p0pass && p1pass; + all_pass &= p0pass && p1pass; + } + + return all_pass; +} + +bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) +{ + bool success = true; + int config_index, pipe_index; + int first_offset, second_offset; + int free_per_plane_reg_index = 0; + + memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); + + for (config_index = 0; config_index < params->num_configurations; config_index++) { + for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { + // Allocate storage for the mcache regs + params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; + + // First initialize all entries to special valid MCache ID and special valid split coordinate + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { + // P0 always enabled + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + } + + // Populate P1 if enabled + if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + } + } + } + } + } + + return success; +} + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) +{ + int i; + unsigned int j; + int next_unused_cache_id = 0; + + for (i = 0; i < params->num_allocations; i++) { + if (!params->allocations[i].valid) + continue; + + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_plane0[0]; + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_plane1[0]; + + // If we need dedicated caches for mall requesting, then we assign them here. + if (params->allocations[i].requires_dedicated_mall_mcache) { + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_mall_plane0[0]; + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_mall_plane1[0]; + } + + // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same + // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the + // largest mcacheID of P0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + + // If we need dedicated caches handle last slice sharing + if (params->allocations[i].requires_dedicated_mall_mcache) { + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular + // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) + if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; + } + } + + // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting + if (!params->allocations[i].requires_dedicated_mall_mcache) { + memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, + sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); + memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, + sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); + } + } +} + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; + + unsigned int total_mcaches_required; + unsigned int i; + bool result = false; + + if (dml->soc_bbox.num_dcc_mcaches == 0) { + return true; + } + + total_mcaches_required = 0; + l->calc_mcache_params.instance = &dml->core_instance; + for (i = 0; i < params->display_config->num_planes; i++) { + if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { + memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); + continue; + } + + l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; + l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; + l->calc_mcache_params.plane_index = i; + + if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { + result = false; + break; + } + + if (params->mcache_allocations[i].valid) { + total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; + if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) + total_mcaches_required--; + } + } + dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + + if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { + result = false; + } else { + result = true; + } + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h new file mode 100644 index 000000000000..55a1ae4655ce --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML_TOP_MCACHE_H__ +#define __DML_TOP_MCACHE_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_display_cfg_types.h" +#include "dml_top_types.h" +#include "dml2_internal_shared_types.h" + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); + +bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); + +bool dml2_top_mcache_unit_test(void); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c new file mode 100644 index 000000000000..de7d8a6a2d3d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#include "dml2_debug.h" + +int dml2_printf(const char *format, ...) +{ +#ifdef _DEBUG +#ifdef _DEBUG_PRINTS + int result; + va_list args; + va_start(args, format); + + result = vprintf(format, args); + + va_end(args); + + return result; +#else + return 0; +#endif +#else + return 0; +#endif +} + +void dml2_assert(int condition) +{ + //ASSERT(condition); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h new file mode 100644 index 000000000000..f118b6911210 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_DEBUG_H__ +#define __DML2_DEBUG_H__ + +#ifdef _DEBUG +#define DML2_ASSERT(condition) dml2_assert(condition) +#else +#define DML2_ASSERT(condition) +#endif + +int dml2_printf(const char *format, ...); +void dml2_assert(int condition); + +#endif
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h new file mode 100644 index 000000000000..d873a6895a32 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -0,0 +1,981 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + + +#ifndef __DML2_INTERNAL_SHARED_TYPES_H__ +#define __DML2_INTERNAL_SHARED_TYPES_H__ + +#include "dml2_external_lib_deps.h" +#include "dml_top_types.h" +#include "dml2_core_shared_types.h" + +/* +* DML2 MCG Types and Interfaces +*/ + +#define DML_MCG_MAX_CLK_TABLE_SIZE 20 + +struct dram_bw_to_min_clk_table_entry { + unsigned long long pre_derate_dram_bw_kbps; + unsigned long min_fclk_khz; + unsigned long min_dcfclk_khz; +}; + +struct dml2_mcg_dram_bw_to_min_clk_table { + struct dram_bw_to_min_clk_table_entry entries[DML_MCG_MAX_CLK_TABLE_SIZE]; + + unsigned int num_entries; +}; + +struct dml2_mcg_min_clock_table { + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dscclk; + unsigned int dtbclk; + unsigned int phyclk; + unsigned int fclk; + unsigned int dcfclk; + } max_clocks_khz; + + struct { + unsigned int dprefclk; + unsigned int xtalclk; + unsigned int pcierefclk; + unsigned int dchubrefclk; + unsigned int amclk; + } fixed_clocks_khz; + + struct dml2_mcg_dram_bw_to_min_clk_table dram_bw_table; +}; + +struct dml2_mcg_build_min_clock_table_params_in_out { + /* + * Input + */ + struct dml2_soc_bb *soc_bb; + struct { + bool perform_pseudo_build; + } clean_me_up; + + /* + * Output + */ + struct dml2_mcg_min_clock_table *min_clk_table; +}; + +struct dml2_mcg_instance { + bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); + bool (*unit_test)(void); +}; + +/* +* DML2 DPMM Types and Interfaces +*/ + +struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { + /* + * Input + */ + struct dml2_core_ip_params *ip; + struct dml2_soc_bb *soc_bb; + struct dml2_mcg_min_clock_table *min_clk_table; + const struct display_configuation_with_meta *display_cfg; + + struct { + bool perform_pseudo_map; + struct dml2_core_internal_soc_bb *soc_bb; + } clean_me_up; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_map_watermarks_params_in_out { + /* + * Input + */ + const struct display_configuation_with_meta *display_cfg; + const struct dml2_core_instance *core; + + /* + * Output + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_dpmm_instance { + bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); + bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); + bool (*unit_test)(void); +}; + +/* +* DML2 Core Types and Interfaces +*/ + +struct dml2_core_initialize_in_out { + enum dml2_project_id project_id; + struct dml2_core_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_mcg_min_clock_table *minimum_clock_table; + + void *explicit_ip_bb; + unsigned int explicit_ip_bb_size; + + // FIXME_STAGE2 can remove but dcn3 version still need this + struct { + struct soc_bounding_box_st *soc_bb; + struct soc_states_st *soc_states; + } legacy; +}; + +struct core_bandwidth_requirements { + int urgent_bandwidth_kbytes_per_sec; + int average_bandwidth_kbytes_per_sec; +}; + +struct core_plane_support_info { + int dpps_used; + int dram_change_latency_hiding_margin_in_active; + int active_latency_hiding_us; + int mall_svp_size_requirement_ways; + int nominal_vblank_pstate_latency_hiding_us; +}; + +struct core_stream_support_info { + unsigned int odms_used; + /* FAMS2 SubVP support info */ + unsigned int phantom_min_v_active; + unsigned int phantom_v_startup; + + unsigned int phantom_v_active; + unsigned int phantom_v_total; + int vblank_reserved_time_us; + int num_dsc_slices; + bool dsc_enable; +}; + +struct core_display_cfg_support_info { + bool is_supported; + + struct core_stream_support_info stream_support_info[DML2_MAX_PLANES]; + struct core_plane_support_info plane_support_info[DML2_MAX_PLANES]; + + struct { + struct dml2_core_internal_mode_support_info support_info; + } clean_me_up; +}; + +struct dml2_core_mode_support_result { + struct { + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } svp_prefetch; + + struct { + unsigned long urgent_bw_sdp_kbps; + unsigned long average_bw_sdp_kbps; + unsigned long urgent_bw_dram_kbps; + unsigned long average_bw_dram_kbps; + unsigned long dcfclk_khz; + unsigned long fclk_khz; + } active; + + unsigned int dispclk_khz; + unsigned int dcfclk_deepsleep_khz; + unsigned int socclk_khz; + + unsigned int uclk_pstate_supported; + unsigned int fclk_pstate_supported; + } global; + + struct { + unsigned int dscclk_khz; + unsigned int dtbclk_khz; + unsigned int phyclk_khz; + } per_stream[DML2_MAX_PLANES]; + + struct { + unsigned int dppclk_khz; + unsigned int mall_svp_allocation_mblks; + unsigned int mall_full_frame_allocation_mblks; + } per_plane[DML2_MAX_PLANES]; + + struct core_display_cfg_support_info cfg_support_info; +}; + +struct dml2_optimization_stage1_state { + bool performed; + bool success; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage2_state { + bool performed; + bool success; + + // Whether or not each plane supports mcache + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + bool per_plane_mcache_support[DML2_MAX_PLANES]; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; +}; + +#define DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS 8 +#define DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE 10 +#define DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE 3 + +struct dml2_implicit_svp_meta { + bool valid; + unsigned long v_active; + unsigned long v_total; + unsigned long v_front_porch; +}; + +struct dml2_fams2_per_method_common_meta { + /* generic params */ + unsigned int allow_start_otg_vline; + unsigned int allow_end_otg_vline; + /* scheduling params */ + double allow_time_us; + double disallow_time_us; + double period_us; +}; + +struct dml2_fams2_meta { + bool valid; + double otg_vline_time_us; + unsigned int scheduling_delay_otg_vlines; + unsigned int vertical_interrupt_ack_delay_otg_vlines; + unsigned int allow_to_target_delay_otg_vlines; + unsigned int contention_delay_otg_vlines; + unsigned int min_allow_width_otg_vlines; + unsigned int nom_vtotal; + double nom_refresh_rate_hz; + double nom_frame_time_us; + unsigned int max_vtotal; + double min_refresh_rate_hz; + double max_frame_time_us; + unsigned int dram_clk_change_blackout_otg_vlines; + struct { + unsigned int max_vactive_det_fill_delay_otg_vlines; + struct dml2_fams2_per_method_common_meta common; + } method_vactive; + struct { + struct dml2_fams2_per_method_common_meta common; + } method_vblank; + struct { + unsigned int programming_delay_otg_vlines; + unsigned int df_throttle_delay_otg_vlines; + unsigned int prefetch_to_mall_delay_otg_vlines; + unsigned long phantom_vactive; + unsigned long phantom_vfp; + unsigned long phantom_vtotal; + struct dml2_fams2_per_method_common_meta common; + } method_subvp; + struct { + unsigned int programming_delay_otg_vlines; + unsigned int stretched_vtotal; + struct dml2_fams2_per_method_common_meta common; + } method_drr; +}; + +struct dml2_optimization_stage3_state { + bool performed; + bool success; + + // The pstate support mode for each plane + // The number of valid elements == display_cfg.num_planes + // The indexing of pstate_switch_modes matches plane_descriptors[] + enum dml2_uclk_pstate_support_method pstate_switch_modes[DML2_MAX_PLANES]; + + // Meta-data for implicit SVP generation, indexed by stream index + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + + // Meta-data for FAMS2 + bool fams2_required; + struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES]; + + int min_clk_index_for_latency; +}; + +struct dml2_optimization_stage4_state { + bool performed; + bool success; + bool unoptimizable_streams[DML2_MAX_DCN_PIPES]; +}; + +struct dml2_optimization_stage5_state { + bool performed; + bool success; + + bool optimal_reserved_time_in_vblank_us; + bool vblank_includes_z8_optimization; +}; + +struct display_configuation_with_meta { + struct dml2_display_cfg display_config; + + struct dml2_core_mode_support_result mode_support_result; + + // Stage 1 = Min Clocks for Latency + struct dml2_optimization_stage1_state stage1; + + // Stage 2 = MCache + struct dml2_optimization_stage2_state stage2; + + // Stage 3 = UCLK PState + struct dml2_optimization_stage3_state stage3; + + // Stage 4 = Vmin + struct dml2_optimization_stage4_state stage4; + + // Stage 5 = Stutter + struct dml2_optimization_stage5_state stage5; +}; + +struct dml2_core_mode_support_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + + struct dml2_mcg_min_clock_table *min_clk_table; + int min_clk_index; + + /* + * Outputs + */ + struct dml2_core_mode_support_result mode_support_result; + + struct { + // Inputs + struct dml_display_cfg_st *display_cfg; + + // Outputs + struct dml_mode_support_info_st *support_info; + unsigned int out_lowest_state_idx; + unsigned int min_fclk_khz; + unsigned int min_dcfclk_khz; + unsigned int min_dram_speed_mts; + unsigned int min_socclk_khz; + unsigned int min_dscclk_khz; + unsigned int min_dtbclk_khz; + unsigned int min_phyclk_khz; + } legacy; +}; + +struct dml2_core_mode_programming_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct display_configuation_with_meta *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + + /* + * Outputs (also Input the clk freq are also from programming struct) + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_core_populate_informative_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + + // If this is set, then the mode was supported, and mode programming + // was successfully run. + // Otherwise, mode programming was not run, because mode support failed. + bool mode_is_supported; + + /* + * Outputs + */ + struct dml2_display_cfg_programming *programming; +}; + +struct dml2_calculate_mcache_allocation_in_out { + /* + * Inputs + */ + struct dml2_core_instance *instance; + const struct dml2_plane_parameters *plane_descriptor; + unsigned int plane_index; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocation; +}; + +struct dml2_core_internal_state_inputs { + unsigned int dummy; +}; + +struct dml2_core_internal_state_intermediates { + unsigned int dummy; +}; + +struct dml2_core_mode_support_locals { + struct dml2_core_calcs_mode_support_ex mode_support_ex_params; + struct dml2_display_cfg svp_expanded_display_cfg; +}; + +struct dml2_core_mode_programming_locals { + struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; + struct dml2_display_cfg svp_expanded_display_cfg; +}; + +struct dml2_core_scratch { + struct dml2_core_mode_support_locals mode_support_locals; + struct dml2_core_mode_programming_locals mode_programming_locals; + int main_stream_index_from_svp_stream_index[DML2_MAX_PLANES]; + int svp_stream_index_from_main_stream_index[DML2_MAX_PLANES]; + int main_plane_index_to_phantom_plane_index[DML2_MAX_PLANES]; + int phantom_plane_index_to_main_plane_index[DML2_MAX_PLANES]; +}; + +struct dml2_core_instance { + struct dml2_mcg_min_clock_table *minimum_clock_table; + struct dml2_core_internal_state_inputs inputs; + struct dml2_core_internal_state_intermediates intermediates; + + struct dml2_core_scratch scratch; + + bool (*initialize)(struct dml2_core_initialize_in_out *in_out); + bool (*mode_support)(struct dml2_core_mode_support_in_out *in_out); + bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out); + bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out); + bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out); + bool (*unit_test)(void); + + struct { + struct dml2_core_internal_display_mode_lib mode_lib; + } clean_me_up; +}; + +/* +* DML2 PMO Types and Interfaces +*/ + +struct dml2_pmo_initialize_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + struct dml2_pmo_options *options; + int min_clock_table_size; +}; + +struct dml2_pmo_optimize_dcc_mcache_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct dml2_display_cfg *display_config; + bool *dcc_mcache_supported; + struct core_display_cfg_support_info *cfg_support_info; + + /* + * Output + */ + struct dml2_display_cfg *optimized_display_cfg; +}; + +struct dml2_pmo_init_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + const struct display_configuation_with_meta *display_config; + const struct dml2_soc_vmin_clock_limits *vmin_limits; +}; + +struct dml2_pmo_optimize_for_vmin_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_pstate_support_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +struct dml2_pmo_init_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_test_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; +}; + +struct dml2_pmo_optimize_for_stutter_in_out { + /* + * Input + */ + struct dml2_pmo_instance *instance; + struct display_configuation_with_meta *base_display_config; + bool last_candidate_failed; + + /* + * Output + */ + struct display_configuation_with_meta *optimized_display_config; +}; + +enum dml2_pmo_pstate_strategy { + dml2_pmo_pstate_strategy_na = 0, + /* hw exclusive modes */ + dml2_pmo_pstate_strategy_vactive = 1, + dml2_pmo_pstate_strategy_vblank = 2, + dml2_pmo_pstate_strategy_reserved_hw = 5, + /* fw assisted exclusive modes */ + dml2_pmo_pstate_strategy_fw_svp = 6, + dml2_pmo_pstate_strategy_reserved_fw = 10, + /* fw assisted modes requiring drr modulation */ + dml2_pmo_pstate_strategy_fw_vactive_drr = 11, + dml2_pmo_pstate_strategy_fw_vblank_drr = 12, + dml2_pmo_pstate_strategy_fw_svp_drr = 13, + dml2_pmo_pstate_strategy_reserved_fw_drr_fixed = 20, + dml2_pmo_pstate_strategy_fw_drr = 21, + dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, +}; + +#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) +#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) +#define PMO_DRR_FIXED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_fw_drr - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) +#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_drr) +#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_svp + 1)) - 1) << dml2_pmo_pstate_strategy_fw_svp) + +#define PMO_DCN4_MAX_DISPLAYS 4 +#define PMO_DCN4_MAX_NUM_VARIANTS 2 +#define PMO_DCN4_MAX_BASE_STRATEGIES 10 + +struct dml2_pmo_scratch { + union { + struct { + double reserved_time_candidates[DML2_MAX_PLANES][DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS]; + int reserved_time_candidates_count[DML2_MAX_PLANES]; + int current_candidate[DML2_MAX_PLANES]; + int min_latency_index; + int max_latency_index; + int cur_latency_index; + int stream_mask; + } pmo_dcn3; + struct { + enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[DML2_MAX_PLANES][DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + bool allow_state_increase_for_strategy[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + int num_pstate_candidates; + int cur_pstate_candidate; + + unsigned int stream_plane_mask[DML2_MAX_PLANES]; + + unsigned int stream_vactive_capability_mask; + + int min_latency_index; + int max_latency_index; + int cur_latency_index; + + // Stores all the implicit SVP meta information indexed by stream index of the display + // configuration under inspection, built at optimization stage init + struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; + struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES]; + + unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE]; + unsigned int num_stutter_candidates; + unsigned int cur_stutter_candidate; + bool z8_vblank_optimizable; + + /* mask of synchronized timings by stream index */ + unsigned int num_timing_groups; + unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; + bool group_is_drr_enabled[DML2_MAX_PLANES]; + double group_line_time_us[DML2_MAX_PLANES]; + + /* scheduling check locals */ + struct dml2_fams2_per_method_common_meta group_common_fams2_meta[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES]; + unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES]; + double group_phase_offset[DML2_MAX_PLANES]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_init_data { + union { + struct { + /* populated once during initialization */ + enum dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 1][PMO_DCN4_MAX_DISPLAYS]; + enum dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2 * 2][PMO_DCN4_MAX_DISPLAYS]; + enum dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 2][PMO_DCN4_MAX_DISPLAYS]; + enum dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 24 * 2][PMO_DCN4_MAX_DISPLAYS]; + unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; + } pmo_dcn4; + }; +}; + +struct dml2_pmo_instance { + struct dml2_soc_bb *soc_bb; + struct dml2_ip_capabilities *ip_caps; + + struct dml2_pmo_options *options; + + int disp_clk_vmin_threshold; + int mpc_combine_limit; + int odm_combine_limit; + int min_clock_table_size; + + union { + struct { + struct { + int prefetch_end_to_mall_start_us; + int fw_processing_delay_us; + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + } v1; + struct { + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } subvp; + struct { + int refresh_rate_limit_min; + int refresh_rate_limit_max; + } drr; + } v2; + } fams_params; + + bool (*initialize)(struct dml2_pmo_initialize_in_out *in_out); + bool (*optimize_dcc_mcache)(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); + + bool (*init_for_vmin)(struct dml2_pmo_init_for_vmin_in_out *in_out); + bool (*test_for_vmin)(struct dml2_pmo_test_for_vmin_in_out *in_out); + bool (*optimize_for_vmin)(struct dml2_pmo_optimize_for_vmin_in_out *in_out); + + bool (*init_for_uclk_pstate)(struct dml2_pmo_init_for_pstate_support_in_out *in_out); + bool (*test_for_uclk_pstate)(struct dml2_pmo_test_for_pstate_support_in_out *in_out); + bool (*optimize_for_uclk_pstate)(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); + + bool (*init_for_stutter)(struct dml2_pmo_init_for_stutter_in_out *in_out); + bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out); + bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out); + + bool (*unit_test)(void); + + struct dml2_pmo_init_data init_data; + struct dml2_pmo_scratch scratch; +}; + +/* +* DML2 MCache Types +*/ + +struct top_mcache_validate_admissability_in_out { + struct dml2_instance *dml2_instance; + + const struct dml2_display_cfg *display_cfg; + const struct core_display_cfg_support_info *cfg_support_info; + struct dml2_mcache_surface_allocation *mcache_allocations; + + bool per_plane_status[DML2_MAX_PLANES]; + + struct { + const struct dml_mode_support_info_st *mode_support_info; + } legacy; +}; + +struct top_mcache_assign_ids_in_out { + /* + * Input + */ + const struct dml2_mcache_surface_allocation *mcache_allocations; + int plane_count; + + int per_pipe_viewport_x_start[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int per_pipe_viewport_x_end[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; + int pipe_count_per_plane[DML2_MAX_PLANES]; + + struct dml2_display_mcache_regs *current_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + + /* + * Output + */ + struct dml2_display_mcache_regs mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp + struct dml2_build_mcache_programming_in_out *mcache_programming; +}; + +struct top_mcache_calc_mcache_count_and_offsets_in_out { + /* + * Inputs + */ + struct dml2_instance *dml2_instance; + const struct dml2_display_cfg *display_config; + + /* + * Outputs + */ + struct dml2_mcache_surface_allocation *mcache_allocations; +}; + +struct top_mcache_assign_global_mcache_ids_in_out { + /* + * Inputs/Outputs + */ + struct dml2_mcache_surface_allocation *allocations; + int num_allocations; +}; + +/* +* DML2 Top Types +*/ + +struct dml2_initialize_instance_locals { + int dummy; +}; + +struct dml2_optimization_init_function_locals { + union { + struct { + struct dml2_pmo_init_for_pstate_support_in_out init_params; + } uclk_pstate; + struct { + struct dml2_pmo_init_for_stutter_in_out stutter_params; + } stutter; + struct { + struct dml2_pmo_init_for_vmin_in_out init_params; + } vmin; + }; +}; + +struct dml2_optimization_test_function_locals { + union { + struct { + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + } test_mcache; + struct { + struct dml2_pmo_test_for_vmin_in_out pmo_test_vmin_params; + } test_vmin; + struct { + struct dml2_pmo_test_for_pstate_support_in_out test_params; + } uclk_pstate; + struct { + struct dml2_pmo_test_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_optimize_function_locals { + union { + struct { + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + } optimize_mcache; + struct { + struct dml2_pmo_optimize_for_vmin_in_out pmo_optimize_vmin_params; + } optimize_vmin; + struct { + struct dml2_pmo_optimize_for_pstate_support_in_out optimize_params; + } uclk_pstate; + struct { + struct dml2_pmo_optimize_for_stutter_in_out stutter_params; + } stutter; + }; +}; + +struct dml2_optimization_phase_locals { + struct display_configuation_with_meta cur_candidate_display_cfg; + struct display_configuation_with_meta next_candidate_display_cfg; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_init_function_locals init_function_locals; + struct dml2_optimization_test_function_locals test_function_locals; + struct dml2_optimization_optimize_function_locals optimize_function_locals; +}; + +struct dml2_check_mode_supported_locals { + struct dml2_display_cfg display_cfg_working_copy; + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; +}; + +struct optimization_init_function_params { + struct dml2_optimization_init_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_test_function_params { + struct dml2_optimization_test_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; +}; + +struct optimization_optimize_function_params { + bool last_candidate_supported; + struct dml2_optimization_optimize_function_locals *locals; + struct dml2_instance *dml; + struct display_configuation_with_meta *display_config; + struct display_configuation_with_meta *optimized_display_config; +}; + +struct optimization_phase_params { + struct dml2_instance *dml; + const struct display_configuation_with_meta *display_config; // Initial Display Configuration + bool (*init_function)(const struct optimization_init_function_params *params); // Test function to determine optimization is complete + bool (*test_function)(const struct optimization_test_function_params *params); // Test function to determine optimization is complete + bool (*optimize_function)(const struct optimization_optimize_function_params *params); // Function which produces a more optimized display configuration + struct display_configuation_with_meta *optimized_display_config; // The optimized display configuration + + bool all_or_nothing; +}; + +struct dml2_build_mode_programming_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct display_configuation_with_meta base_display_config_with_meta; + struct display_configuation_with_meta optimized_display_config_with_meta; + struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params; + struct dml2_dpmm_map_watermarks_params_in_out dppm_map_watermarks_params; + struct dml2_optimization_phase_locals optimization_phase_locals; + struct optimization_phase_params min_clock_for_latency_phase; + struct optimization_phase_params mcache_phase; + struct optimization_phase_params uclk_pstate_phase; + struct optimization_phase_params vmin_phase; + struct optimization_phase_params stutter_phase; +}; + +struct dml2_legacy_core_build_mode_programming_wrapper_locals { + struct dml2_core_mode_support_in_out mode_support_params; + struct dml2_core_mode_programming_in_out mode_programming_params; + struct dml2_core_populate_informative_in_out informative_params; + struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params; + struct top_mcache_validate_admissability_in_out validate_admissibility_params; + struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params; + struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params; + struct dml2_display_cfg optimized_display_cfg; + struct core_display_cfg_support_info core_support_info; +}; + +struct dml2_top_mcache_verify_mcache_size_locals { + struct dml2_calculate_mcache_allocation_in_out calc_mcache_params; +}; + +struct dml2_top_mcache_validate_admissability_locals { + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane0; + struct { + int pipe_vp_startx[DML2_MAX_DCN_PIPES]; + int pipe_vp_endx[DML2_MAX_DCN_PIPES]; + } plane1; +}; + +struct dml2_top_display_cfg_support_info { + const struct dml2_display_cfg *display_config; + struct core_display_cfg_support_info core_info; + enum dml2_pstate_support_method per_plane_pstate_method[DML2_MAX_PLANES]; +}; + +struct dml2_instance { + enum dml2_project_id project_id; + + struct dml2_core_instance core_instance; + struct dml2_mcg_instance mcg_instance; + struct dml2_dpmm_instance dpmm_instance; + struct dml2_pmo_instance pmo_instance; + + struct dml2_soc_bb soc_bbox; + struct dml2_ip_capabilities ip_caps; + + struct dml2_mcg_min_clock_table min_clk_table; + + struct dml2_pmo_options pmo_options; + + struct { + struct dml2_initialize_instance_locals initialize_instance_locals; + struct dml2_top_mcache_verify_mcache_size_locals mcache_verify_mcache_size_locals; + struct dml2_top_mcache_validate_admissability_locals mcache_validate_admissability_locals; + struct dml2_check_mode_supported_locals check_mode_supported_locals; + struct dml2_build_mode_programming_locals build_mode_programming_locals; + } scratch; + + struct { + struct { + struct dml2_legacy_core_build_mode_programming_wrapper_locals legacy_core_build_mode_programming_wrapper_locals; + } scratch; + } legacy; +}; +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c new file mode 100644 index 000000000000..eee64d8e1013 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -0,0 +1,432 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "core_types.h" +#include "reg_helper.h" +#include "dcn401/dcn401_dpp.h" +#include "basics/conversion.h" +#include "dcn30/dcn30_cm_common.h" +#include "dcn32/dcn32_dpp.h" + +#define REG(reg)\ + dpp->tf_regs->reg + +#define CTX \ + dpp->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + dpp->tf_shift->field_name, dpp->tf_mask->field_name + +void dpp401_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s) +{ + struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); + + REG_GET(DPP_CONTROL, + DPP_CLOCK_ENABLE, &s->is_enabled); + + // TODO: Implement for DCN4 +} + +void dpp401_dpp_setup( + struct dpp *dpp_base, + enum surface_pixel_format format, + enum expansion_mode mode, + struct dc_csc_transform input_csc_color_matrix, + enum dc_color_space input_color_space, + struct cnv_alpha_2bit_lut *alpha_2bit_lut) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + uint32_t pixel_format = 0; + uint32_t alpha_en = 1; + enum dc_color_space color_space = COLOR_SPACE_SRGB; + enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS; + uint32_t is_2bit = 0; + uint32_t alpha_plane_enable = 0; + uint32_t dealpha_en = 0, dealpha_ablnd_en = 0; + uint32_t realpha_en = 0, realpha_ablnd_en = 0; + uint32_t program_prealpha_dealpha = 0; + struct out_csc_color_matrix tbl_entry; + int i; + + REG_SET_2(FORMAT_CONTROL, 0, + CNVC_BYPASS, 0, + FORMAT_EXPANSION_MODE, mode); + + REG_UPDATE(FORMAT_CONTROL, FORMAT_CNV16, 0); + REG_UPDATE(FORMAT_CONTROL, CNVC_BYPASS_MSB_ALIGN, 0); + REG_UPDATE(FORMAT_CONTROL, CLAMP_POSITIVE, 0); + REG_UPDATE(FORMAT_CONTROL, CLAMP_POSITIVE_C, 0); + + REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_R, 0); + REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_G, 1); + REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_B, 2); + + switch (format) { + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + pixel_format = 1; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + pixel_format = 3; + alpha_en = 0; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: + pixel_format = 8; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: + pixel_format = 10; + is_2bit = 1; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + pixel_format = 65; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + pixel_format = 64; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + pixel_format = 67; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + pixel_format = 66; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: + pixel_format = 26; /* ARGB16161616_UNORM */ + break; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + pixel_format = 24; + break; + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + pixel_format = 25; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888: + pixel_format = 12; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX: + pixel_format = 112; + alpha_en = 0; + break; + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX: + pixel_format = 113; + alpha_en = 0; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010: + pixel_format = 114; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + is_2bit = 1; + break; + case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102: + pixel_format = 115; + color_space = COLOR_SPACE_YCBCR709; + select = INPUT_CSC_SELECT_ICSC; + is_2bit = 1; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE: + pixel_format = 116; + alpha_plane_enable = 0; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + pixel_format = 116; + alpha_plane_enable = 1; + break; + case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT: + pixel_format = 118; + alpha_en = 0; + break; + case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT: + pixel_format = 119; + alpha_en = 0; + break; + default: + break; + } + + /* Set default color space based on format if none is given. */ + color_space = input_color_space ? input_color_space : color_space; + + if (is_2bit == 1 && alpha_2bit_lut != NULL) { + REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0); + REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1); + REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT2, alpha_2bit_lut->lut2); + REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT3, alpha_2bit_lut->lut3); + } + + REG_SET_2(CNVC_SURFACE_PIXEL_FORMAT, 0, + CNVC_SURFACE_PIXEL_FORMAT, pixel_format, + CNVC_ALPHA_PLANE_ENABLE, alpha_plane_enable); + REG_UPDATE(FORMAT_CONTROL, FORMAT_CONTROL__ALPHA_EN, alpha_en); + + if (program_prealpha_dealpha) { + dealpha_en = 1; + realpha_en = 1; + } + REG_SET_2(PRE_DEALPHA, 0, + PRE_DEALPHA_EN, dealpha_en, + PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en); + REG_SET_2(PRE_REALPHA, 0, + PRE_REALPHA_EN, realpha_en, + PRE_REALPHA_ABLND_EN, realpha_ablnd_en); + + /* If input adjustment exists, program the ICSC with those values. */ + if (input_csc_color_matrix.enable_adjustment == true) { + for (i = 0; i < 12; i++) + tbl_entry.regval[i] = input_csc_color_matrix.matrix[i]; + + tbl_entry.color_space = input_color_space; + + if (color_space >= COLOR_SPACE_YCBCR601) + select = INPUT_CSC_SELECT_ICSC; + else + select = INPUT_CSC_SELECT_BYPASS; + + dpp3_program_post_csc(dpp_base, color_space, select, + &tbl_entry); + } else { + dpp3_program_post_csc(dpp_base, color_space, select, NULL); + } +} + + +static struct dpp_funcs dcn401_dpp_funcs = { + .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, + .dpp_read_state = dpp401_read_state, + .dpp_reset = dpp_reset, + .dpp_set_scaler = dpp401_dscl_set_scaler_manual_scale, + .dpp_get_optimal_number_of_taps = dpp3_get_optimal_number_of_taps, + .dpp_set_gamut_remap = NULL, + .dpp_set_csc_adjustment = NULL, + .dpp_set_csc_default = NULL, + .dpp_program_regamma_pwl = NULL, + .dpp_set_pre_degam = dpp3_set_pre_degam, + .dpp_program_input_lut = NULL, + .dpp_full_bypass = dpp401_full_bypass, + .dpp_setup = dpp401_dpp_setup, + .dpp_program_degamma_pwl = NULL, + .dpp_program_cm_dealpha = dpp3_program_cm_dealpha, + .dpp_program_cm_bias = dpp3_program_cm_bias, + + .dpp_program_blnd_lut = NULL, // BLNDGAM is removed completely in DCN3.2 DPP + .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) + .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) + + .dpp_program_bias_and_scale = NULL, + .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, + .set_cursor_attributes = dpp401_set_cursor_attributes, + .set_cursor_position = dpp401_set_cursor_position, + .set_optional_cursor_attributes = dpp401_set_optional_cursor_attributes, + .dpp_dppclk_control = dpp1_dppclk_control, + .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, + .set_cursor_matrix = dpp401_set_cursor_matrix, +}; + + +static struct dpp_caps dcn401_dpp_cap = { + .dscl_data_proc_format = DSCL_DATA_PRCESSING_FLOAT_FORMAT, + .max_lb_partitions = 63, + .dscl_calc_lb_num_partitions = dscl401_calc_lb_num_partitions, +}; + +bool dpp401_construct( + struct dcn401_dpp *dpp, + struct dc_context *ctx, + uint32_t inst, + const struct dcn401_dpp_registers *tf_regs, + const struct dcn401_dpp_shift *tf_shift, + const struct dcn401_dpp_mask *tf_mask) +{ + dpp->base.ctx = ctx; + + dpp->base.inst = inst; + dpp->base.funcs = &dcn401_dpp_funcs; + dpp->base.caps = &dcn401_dpp_cap; + + dpp->tf_regs = tf_regs; + dpp->tf_shift = tf_shift; + dpp->tf_mask = tf_mask; + + return true; +} +/* Compute the maximum number of lines that we can fit in the line buffer */ + +void dscl401_calc_lb_num_partitions( + const struct scaler_data *scl_data, + enum lb_memory_config lb_config, + int *num_part_y, + int *num_part_c) +{ + int memory_line_size_y, memory_line_size_c, memory_line_size_a, + lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a; + + int line_size = scl_data->viewport.width < scl_data->recout.width ? + scl_data->viewport.width : scl_data->recout.width; + int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ? + scl_data->viewport_c.width : scl_data->recout.width; + + if (line_size == 0) + line_size = 1; + + if (line_size_c == 0) + line_size_c = 1; + + memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */ + memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */ + memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ + + if (lb_config == LB_MEMORY_CONFIG_1) { + lb_memory_size = 970; + lb_memory_size_c = 970; + lb_memory_size_a = 970; + } else if (lb_config == LB_MEMORY_CONFIG_2) { + lb_memory_size = 1290; + lb_memory_size_c = 1290; + lb_memory_size_a = 1290; + } else if (lb_config == LB_MEMORY_CONFIG_3) { + if (scl_data->viewport.width == scl_data->h_active && + scl_data->viewport.height == scl_data->v_active) { + /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */ + /* use increased LB size for calculation only if Scaler not enabled */ + lb_memory_size = 970 + 1290 + 1170 + 1170 + 1170; + lb_memory_size_c = 970 + 1290; + lb_memory_size_a = 970 + 1290 + 1170; + } else { + /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */ + lb_memory_size = 970 + 1290 + 484 + 484 + 484; + lb_memory_size_c = 970 + 1290; + lb_memory_size_a = 970 + 1290 + 484; + } + } else { + if (scl_data->viewport.width == scl_data->h_active && + scl_data->viewport.height == scl_data->v_active) { + /* use increased LB size for calculation only if Scaler not enabled */ + lb_memory_size = 970 + 1290 + 1170; + lb_memory_size_c = 970 + 1290 + 1170; + lb_memory_size_a = 970 + 1290 + 1170; + } else { + lb_memory_size = 970 + 1290 + 484; + lb_memory_size_c = 970 + 1290 + 484; + lb_memory_size_a = 970 + 1290 + 484; + } + } + *num_part_y = lb_memory_size / memory_line_size_y; + *num_part_c = lb_memory_size_c / memory_line_size_c; + num_partitions_a = lb_memory_size_a / memory_line_size_a; + + if (scl_data->lb_params.alpha_en + && (num_partitions_a < *num_part_y)) + *num_part_y = num_partitions_a; + + if (*num_part_y > 64) + *num_part_y = 64; + if (*num_part_c > 64) + *num_part_c = 64; +} + +/* Compute the maximum number of lines that we can fit in the line buffer */ +void dscl401_spl_calc_lb_num_partitions( + bool alpha_en, + const struct spl_scaler_data *scl_data, + enum lb_memory_config lb_config, + int *num_part_y, + int *num_part_c) +{ + int memory_line_size_y, memory_line_size_c, memory_line_size_a, + lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a; + + int line_size = scl_data->viewport.width < scl_data->recout.width ? + scl_data->viewport.width : scl_data->recout.width; + int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ? + scl_data->viewport_c.width : scl_data->recout.width; + + if (line_size == 0) + line_size = 1; + + if (line_size_c == 0) + line_size_c = 1; + + memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */ + memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */ + memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ + + if (lb_config == LB_MEMORY_CONFIG_1) { + lb_memory_size = 970; + lb_memory_size_c = 970; + lb_memory_size_a = 970; + } else if (lb_config == LB_MEMORY_CONFIG_2) { + lb_memory_size = 1290; + lb_memory_size_c = 1290; + lb_memory_size_a = 1290; + } else if (lb_config == LB_MEMORY_CONFIG_3) { + if (scl_data->viewport.width == scl_data->h_active && + scl_data->viewport.height == scl_data->v_active) { + /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */ + /* use increased LB size for calculation only if Scaler not enabled */ + lb_memory_size = 970 + 1290 + 1170 + 1170 + 1170; + lb_memory_size_c = 970 + 1290; + lb_memory_size_a = 970 + 1290 + 1170; + } else { + /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */ + lb_memory_size = 970 + 1290 + 484 + 484 + 484; + lb_memory_size_c = 970 + 1290; + lb_memory_size_a = 970 + 1290 + 484; + } + } else { + if (scl_data->viewport.width == scl_data->h_active && + scl_data->viewport.height == scl_data->v_active) { + /* use increased LB size for calculation only if Scaler not enabled */ + lb_memory_size = 970 + 1290 + 1170; + lb_memory_size_c = 970 + 1290 + 1170; + lb_memory_size_a = 970 + 1290 + 1170; + } else { + lb_memory_size = 970 + 1290 + 484; + lb_memory_size_c = 970 + 1290 + 484; + lb_memory_size_a = 970 + 1290 + 484; + } + } + *num_part_y = lb_memory_size / memory_line_size_y; + *num_part_c = lb_memory_size_c / memory_line_size_c; + num_partitions_a = lb_memory_size_a / memory_line_size_a; + + if (alpha_en && (num_partitions_a < *num_part_y)) + *num_part_y = num_partitions_a; + + if (*num_part_y > 64) + *num_part_y = 64; + if (*num_part_c > 64) + *num_part_c = 64; +} diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h new file mode 100644 index 000000000000..7ab657ad3a20 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h @@ -0,0 +1,725 @@ +/* Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN401_DPP_H__ +#define __DCN401_DPP_H__ + +#include "dcn20/dcn20_dpp.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn32/dcn32_dpp.h" + +#define TO_DCN401_DPP(dpp)\ + container_of(dpp, struct dcn401_dpp, base) + +#define DPP_REG_LIST_SH_MASK_DCN401_COMMON(mask_sh)\ + TF_SF(CM0_CM_MEM_PWR_STATUS, GAMCOR_MEM_PWR_STATE, mask_sh),\ + TF_SF(CM0_CM_DEALPHA, CM_DEALPHA_EN, mask_sh),\ + TF_SF(CM0_CM_DEALPHA, CM_DEALPHA_ABLND, mask_sh),\ + TF_SF(CM0_CM_BIAS_CR_R, CM_BIAS_CR_R, mask_sh),\ + TF_SF(CM0_CM_BIAS_Y_G_CB_B, CM_BIAS_Y_G, mask_sh),\ + TF_SF(CM0_CM_BIAS_Y_G_CB_B, CM_BIAS_CB_B, mask_sh),\ + TF_SF(CM0_CM_MEM_PWR_CTRL, GAMCOR_MEM_PWR_DIS, mask_sh),\ + TF_SF(CM0_CM_MEM_PWR_CTRL, GAMCOR_MEM_PWR_FORCE, mask_sh),\ + TF_SF(CNVC_CFG0_PRE_DEGAM, PRE_DEGAM_MODE, mask_sh),\ + TF_SF(CNVC_CFG0_PRE_DEGAM, PRE_DEGAM_SELECT, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_MODE, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_PWL_DISABLE, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_INDEX, CM_GAMCOR_LUT_INDEX, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_DATA, CM_GAMCOR_LUT_DATA, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_WRITE_COLOR_MASK, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_COLOR_SEL, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_DBG, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_HOST_SEL, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_CONFIG_MODE, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_START_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_START_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_SEGMENT_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_SLOPE_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_BASE_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL1_B, CM_GAMCOR_RAMA_EXP_REGION_END_BASE_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL2_B, CM_GAMCOR_RAMA_EXP_REGION_END_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL2_B, CM_GAMCOR_RAMA_EXP_REGION_END_SLOPE_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_OFFSET_B, CM_GAMCOR_RAMA_OFFSET_B, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION0_LUT_OFFSET, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION0_NUM_SEGMENTS, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION1_LUT_OFFSET, mask_sh),\ + TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION1_NUM_SEGMENTS, mask_sh),\ + TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\ + TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_RIGHT, mask_sh),\ + TF_SF(DSCL0_DSCL_EXT_OVERSCAN_TOP_BOTTOM, EXT_OVERSCAN_BOTTOM, mask_sh),\ + TF_SF(DSCL0_DSCL_EXT_OVERSCAN_TOP_BOTTOM, EXT_OVERSCAN_TOP, mask_sh),\ + TF_SF(DSCL0_OTG_H_BLANK, OTG_H_BLANK_START, mask_sh),\ + TF_SF(DSCL0_OTG_H_BLANK, OTG_H_BLANK_END, mask_sh),\ + TF_SF(DSCL0_OTG_V_BLANK, OTG_V_BLANK_START, mask_sh),\ + TF_SF(DSCL0_OTG_V_BLANK, OTG_V_BLANK_END, mask_sh),\ + TF_SF(DSCL0_LB_DATA_FORMAT, INTERLEAVE_EN, mask_sh),\ + TF2_SF(DSCL0, LB_DATA_FORMAT__ALPHA_EN, mask_sh),\ + TF_SF(DSCL0_LB_MEMORY_CTRL, MEMORY_CONFIG, mask_sh),\ + TF_SF(DSCL0_LB_MEMORY_CTRL, LB_MAX_PARTITIONS, mask_sh),\ + TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_NUM_PIPE, mask_sh),\ + TF_SF(DSCL0_DSCL_CONTROL, SCL_BOUNDARY_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_PIPE_ID, mask_sh),\ + TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_V_NUM_TAPS, mask_sh),\ + TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_H_NUM_TAPS, mask_sh),\ + TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_V_NUM_TAPS_C, mask_sh),\ + TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_H_NUM_TAPS_C, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_PHASE, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_FILTER_TYPE, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\ + TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_HARDCODE_COEF_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_SHARP_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_SHARP_FACTOR, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_HARDCODE_COEF_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_SHARP_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_SHARP_FACTOR, mask_sh),\ + TF_SF(DSCL0_SCL_MODE, SCL_COEF_RAM_SELECT, mask_sh),\ + TF_SF(DSCL0_SCL_MODE, DSCL_MODE, mask_sh),\ + TF_SF(DSCL0_RECOUT_START, RECOUT_START_X, mask_sh),\ + TF_SF(DSCL0_RECOUT_START, RECOUT_START_Y, mask_sh),\ + TF_SF(DSCL0_RECOUT_SIZE, RECOUT_WIDTH, mask_sh),\ + TF_SF(DSCL0_RECOUT_SIZE, RECOUT_HEIGHT, mask_sh),\ + TF_SF(DSCL0_MPC_SIZE, MPC_WIDTH, mask_sh),\ + TF_SF(DSCL0_MPC_SIZE, MPC_HEIGHT, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_SCALE_RATIO, SCL_H_SCALE_RATIO, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_SCALE_RATIO, SCL_V_SCALE_RATIO, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_SCALE_RATIO_C, SCL_H_SCALE_RATIO_C, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_SCALE_RATIO_C, SCL_V_SCALE_RATIO_C, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_INIT, SCL_H_INIT_FRAC, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_INIT, SCL_H_INIT_INT, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_INIT_C, SCL_H_INIT_FRAC_C, mask_sh),\ + TF_SF(DSCL0_SCL_HORZ_FILTER_INIT_C, SCL_H_INIT_INT_C, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_INIT, SCL_V_INIT_FRAC, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_INIT, SCL_V_INIT_INT, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_INIT_C, SCL_V_INIT_FRAC_C, mask_sh),\ + TF_SF(DSCL0_SCL_VERT_FILTER_INIT_C, SCL_V_INIT_INT_C, mask_sh),\ + TF_SF(DSCL0_SCL_MODE, SCL_CHROMA_COEF_MODE, mask_sh),\ + TF_SF(DSCL0_SCL_MODE, SCL_COEF_RAM_SELECT_CURRENT, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_DEALPHA, PRE_DEALPHA_EN, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_DEALPHA, PRE_DEALPHA_ABLND_EN, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_REALPHA, PRE_REALPHA_EN, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_REALPHA, PRE_REALPHA_ABLND_EN, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_MODE, PRE_CSC_MODE, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_MODE, PRE_CSC_MODE_CURRENT, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_C11_C12, PRE_CSC_C11, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_C11_C12, PRE_CSC_C12, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_C33_C34, PRE_CSC_C33, mask_sh), \ + TF_SF(CNVC_CFG0_PRE_CSC_C33_C34, PRE_CSC_C34, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_CONTROL, CM_POST_CSC_MODE, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_CONTROL, CM_POST_CSC_MODE_CURRENT, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_C11_C12, CM_POST_CSC_C11, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_C11_C12, CM_POST_CSC_C12, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C33, mask_sh), \ + TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C34, mask_sh), \ + TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \ + TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \ + TF_SF(CNVC_CFG0_CNVC_SURFACE_PIXEL_FORMAT, CNVC_SURFACE_PIXEL_FORMAT, mask_sh), \ + TF_SF(CNVC_CFG0_CNVC_SURFACE_PIXEL_FORMAT, CNVC_ALPHA_PLANE_ENABLE, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_MODE, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_EXPANSION_MODE, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_ENABLE, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_COLOR0, CUR0_COLOR0, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_COLOR1, CUR0_COLOR1, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_MODE, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_MODE_CURRENT, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_COEF_FORMAT, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C11_C12_A, CUR0_MATRIX_C11_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C11_C12_A, CUR0_MATRIX_C12_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C13_C14_A, CUR0_MATRIX_C13_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C13_C14_A, CUR0_MATRIX_C14_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C21_C22_A, CUR0_MATRIX_C21_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C21_C22_A, CUR0_MATRIX_C22_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C23_C24_A, CUR0_MATRIX_C23_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C23_C24_A, CUR0_MATRIX_C24_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C31_C32_A, CUR0_MATRIX_C31_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C31_C32_A, CUR0_MATRIX_C32_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C33_C34_A, CUR0_MATRIX_C33_A, mask_sh), \ + TF_SF(CM_CUR0_CUR0_MATRIX_C33_C34_A, CUR0_MATRIX_C34_A, mask_sh), \ + TF_SF(DPP_TOP0_DPP_CONTROL, DPP_CLOCK_ENABLE, mask_sh), \ + TF_SF(CM0_CM_HDR_MULT_COEF, CM_HDR_MULT_COEF, mask_sh), \ + TF_SF(CM0_CM_CONTROL, CM_BYPASS, mask_sh), \ + TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \ + TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_PITCH, mask_sh), \ + TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_LINES_PER_CHUNK, mask_sh), \ + TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CNV16, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS_MSB_ALIGN, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, CLAMP_POSITIVE, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, CLAMP_POSITIVE_C, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_R, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_G, mask_sh), \ + TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_B, mask_sh), \ + TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, mask_sh), \ + TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, mask_sh), \ + TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT2, mask_sh), \ + TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT3, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, mask_sh), \ + TF_SF(CNVC_CFG0_FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, COLOR_KEYER_EN, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, LUMA_KEYER_EN, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, COLOR_KEYER_MODE, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_ALPHA, COLOR_KEYER_ALPHA_LOW, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_ALPHA, COLOR_KEYER_ALPHA_HIGH, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_RED, COLOR_KEYER_RED_LOW, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_RED, COLOR_KEYER_RED_HIGH, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_GREEN, COLOR_KEYER_GREEN_LOW, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_GREEN, COLOR_KEYER_GREEN_HIGH, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_BLUE, COLOR_KEYER_BLUE_LOW, mask_sh), \ + TF_SF(CNVC_CFG0_COLOR_KEYER_BLUE, COLOR_KEYER_BLUE_HIGH, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_PIX_INV_MODE, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_PIXEL_ALPHA_MOD_EN, mask_sh), \ + TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_ROM_EN, mask_sh),\ + TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\ + TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh),\ + TF_SF(DSCL0_DSCL_MEM_PWR_STATUS, LUT_MEM_PWR_STATE, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MODE, SCL_SC_MATRIX_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MODE, SCL_SC_LTONL_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_RINGEST_FORCE_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_2TAP_SHARP_FACTOR, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF1_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF3_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_FLAT1_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_FLAT2_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_ROC_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_GAIN, SCL_EASF_H_RINGEST_EVENTAP_GAIN1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_GAIN, SCL_EASF_H_RINGEST_EVENTAP_GAIN2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MAXA, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MAXB, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MINA, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MINB, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_IN_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_BASE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_SLOPE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_IN_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_BASE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_SLOPE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_IN_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_BASE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_SLOPE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_IN_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_BASE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_SLOPE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_IN_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_BASE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_SLOPE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_IN_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_BASE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_SLOPE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_IN_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_BASE_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_SLOPE_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG7, SCL_EASF_H_BF1_PWL_IN_SEG7, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG7, SCL_EASF_H_BF1_PWL_BASE_SEG7, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_IN_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_BASE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_SLOPE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_IN_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_BASE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_SLOPE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_IN_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_BASE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_SLOPE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_IN_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_BASE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_SLOPE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_IN_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_BASE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_SLOPE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG5, SCL_EASF_H_BF3_PWL_IN_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG5, SCL_EASF_H_BF3_PWL_BASE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_RINGEST_FORCE_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_2TAP_SHARP_FACTOR, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF1_EN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF3_MODE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_FLAT1_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_FLAT2_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_ROC_GAIN, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL1, SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL1, SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL2, SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL2, SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL3, SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL3, SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_GAIN, SCL_EASF_V_RINGEST_EVENTAP_GAIN1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_GAIN, SCL_EASF_V_RINGEST_EVENTAP_GAIN2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MAXA, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MAXB, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MINA, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MINB, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_IN_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_BASE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_SLOPE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_IN_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_BASE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_SLOPE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_IN_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_BASE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_SLOPE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_IN_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_BASE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_SLOPE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_IN_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_BASE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_SLOPE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_IN_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_BASE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_SLOPE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_IN_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_BASE_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_SLOPE_SEG6, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG7, SCL_EASF_V_BF1_PWL_IN_SEG7, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG7, SCL_EASF_V_BF1_PWL_BASE_SEG7, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_IN_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_BASE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_SLOPE_SEG0, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_IN_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_BASE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_SLOPE_SEG1, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_IN_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_BASE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_SLOPE_SEG2, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_IN_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_BASE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_SLOPE_SEG3, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_IN_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_BASE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_SLOPE_SEG4, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG5, SCL_EASF_V_BF3_PWL_IN_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG5, SCL_EASF_V_BF3_PWL_BASE_SEG5, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C0, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C1, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C2, mask_sh),\ + TF_SF(DSCL0_DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C3, mask_sh),\ + TF_SF(DSCL0_ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, mask_sh),\ + TF_SF(DSCL0_ISHARP_DELTA_DATA, ISHARP_DELTA_DATA, mask_sh),\ + TF_SF(DSCL0_ISHARP_DELTA_INDEX, ISHARP_DELTA_INDEX, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_EN, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_NOISEDET_EN, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_NOISEDET_MODE, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_LBA_MODE, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_DELTA_LUT_SELECT, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_FMT_MODE, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_FMT_NORM, mask_sh),\ + TF_SF(DSCL0_ISHARP_MODE, ISHARP_DELTA_LUT_SELECT_CURRENT, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_IN_SEG0, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_BASE_SEG0, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_SLOPE_SEG0, mask_sh), \ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_IN_SEG1, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_BASE_SEG1, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_SLOPE_SEG1, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_IN_SEG2, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_BASE_SEG2, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_SLOPE_SEG2, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_IN_SEG3, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_BASE_SEG3, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_SLOPE_SEG3, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_IN_SEG4, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_BASE_SEG4, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_SLOPE_SEG4, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_IN_SEG5, mask_sh),\ + TF_SF(DSCL0_ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_BASE_SEG5, mask_sh),\ + TF_SF(DSCL0_ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_UTHRE, mask_sh),\ + TF_SF(DSCL0_ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_DTHRE, mask_sh), \ + TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_START_IN, mask_sh), \ + TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_END_IN, mask_sh), \ + TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_SLOPE, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_EN_P, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_PIVOT_P, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_P, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_EN_N, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_PIVOT_N, mask_sh), \ + TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_N, mask_sh) + +#define DPP_REG_FIELD_LIST_DCN401(type) \ + DPP_REG_FIELD_LIST_DCN3(type); \ + type CUR0_FP_BIAS_G_Y; \ + type CUR0_FP_SCALE_G_Y; \ + type CUR0_FP_BIAS_RB_CRCB; \ + type CUR0_FP_SCALE_RB_CRCB; \ + type CUR0_MATRIX_MODE; \ + type CUR0_MATRIX_MODE_CURRENT; \ + type CUR0_MATRIX_COEF_FORMAT; \ + type CUR0_MATRIX_C11_A; \ + type CUR0_MATRIX_C12_A; \ + type CUR0_MATRIX_C13_A; \ + type CUR0_MATRIX_C14_A; \ + type CUR0_MATRIX_C21_A; \ + type CUR0_MATRIX_C22_A; \ + type CUR0_MATRIX_C23_A; \ + type CUR0_MATRIX_C24_A; \ + type CUR0_MATRIX_C31_A; \ + type CUR0_MATRIX_C32_A; \ + type CUR0_MATRIX_C33_A; \ + type CUR0_MATRIX_C34_A; \ + type LUMA_KEYER_EN; \ + type SCL_SC_MATRIX_MODE; \ + type SCL_SC_LTONL_EN; \ + type SCL_EASF_H_EN; \ + type SCL_EASF_H_RINGEST_FORCE_EN; \ + type SCL_EASF_H_2TAP_SHARP_FACTOR; \ + type SCL_EASF_H_BF1_EN; \ + type SCL_EASF_H_BF2_MODE; \ + type SCL_EASF_H_BF3_MODE; \ + type SCL_EASF_H_BF2_FLAT1_GAIN; \ + type SCL_EASF_H_BF2_FLAT2_GAIN; \ + type SCL_EASF_H_BF2_ROC_GAIN; \ + type SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1; \ + type SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2; \ + type SCL_EASF_H_RINGEST_EVENTAP_GAIN1; \ + type SCL_EASF_H_RINGEST_EVENTAP_GAIN2; \ + type SCL_EASF_H_BF_MAXA; \ + type SCL_EASF_H_BF_MAXB; \ + type SCL_EASF_H_BF_MINA; \ + type SCL_EASF_H_BF_MINB; \ + type SCL_EASF_H_BF1_PWL_IN_SEG0; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG0; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG0; \ + type SCL_EASF_H_BF1_PWL_IN_SEG1; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG1; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG1; \ + type SCL_EASF_H_BF1_PWL_IN_SEG2; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG2; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG2; \ + type SCL_EASF_H_BF1_PWL_IN_SEG3; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG3; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG3; \ + type SCL_EASF_H_BF1_PWL_IN_SEG4; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG4; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG4; \ + type SCL_EASF_H_BF1_PWL_IN_SEG5; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG5; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG5; \ + type SCL_EASF_H_BF1_PWL_IN_SEG6; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG6; \ + type SCL_EASF_H_BF1_PWL_SLOPE_SEG6; \ + type SCL_EASF_H_BF1_PWL_IN_SEG7; \ + type SCL_EASF_H_BF1_PWL_BASE_SEG7; \ + type SCL_EASF_H_BF3_PWL_IN_SEG0; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG0; \ + type SCL_EASF_H_BF3_PWL_SLOPE_SEG0; \ + type SCL_EASF_H_BF3_PWL_IN_SEG1; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG1; \ + type SCL_EASF_H_BF3_PWL_SLOPE_SEG1; \ + type SCL_EASF_H_BF3_PWL_IN_SEG2; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG2; \ + type SCL_EASF_H_BF3_PWL_SLOPE_SEG2; \ + type SCL_EASF_H_BF3_PWL_IN_SEG3; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG3; \ + type SCL_EASF_H_BF3_PWL_SLOPE_SEG3; \ + type SCL_EASF_H_BF3_PWL_IN_SEG4; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG4; \ + type SCL_EASF_H_BF3_PWL_SLOPE_SEG4; \ + type SCL_EASF_H_BF3_PWL_IN_SEG5; \ + type SCL_EASF_H_BF3_PWL_BASE_SEG5; \ + type SCL_EASF_V_EN; \ + type SCL_EASF_V_RINGEST_FORCE_EN; \ + type SCL_EASF_V_2TAP_SHARP_FACTOR; \ + type SCL_EASF_V_BF1_EN; \ + type SCL_EASF_V_BF2_MODE; \ + type SCL_EASF_V_BF3_MODE; \ + type SCL_EASF_V_BF2_FLAT1_GAIN; \ + type SCL_EASF_V_BF2_FLAT2_GAIN; \ + type SCL_EASF_V_BF2_ROC_GAIN; \ + type SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT; \ + type SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL; \ + type SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE; \ + type SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE; \ + type SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE; \ + type SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET; \ + type SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1; \ + type SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2; \ + type SCL_EASF_V_RINGEST_EVENTAP_GAIN1; \ + type SCL_EASF_V_RINGEST_EVENTAP_GAIN2; \ + type SCL_EASF_V_BF_MAXA; \ + type SCL_EASF_V_BF_MAXB; \ + type SCL_EASF_V_BF_MINA; \ + type SCL_EASF_V_BF_MINB; \ + type SCL_EASF_V_BF1_PWL_IN_SEG0; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG0; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG0; \ + type SCL_EASF_V_BF1_PWL_IN_SEG1; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG1; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG1; \ + type SCL_EASF_V_BF1_PWL_IN_SEG2; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG2; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG2; \ + type SCL_EASF_V_BF1_PWL_IN_SEG3; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG3; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG3; \ + type SCL_EASF_V_BF1_PWL_IN_SEG4; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG4; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG4; \ + type SCL_EASF_V_BF1_PWL_IN_SEG5; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG5; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG5; \ + type SCL_EASF_V_BF1_PWL_IN_SEG6; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG6; \ + type SCL_EASF_V_BF1_PWL_SLOPE_SEG6; \ + type SCL_EASF_V_BF1_PWL_IN_SEG7; \ + type SCL_EASF_V_BF1_PWL_BASE_SEG7; \ + type SCL_EASF_V_BF3_PWL_IN_SEG0; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG0; \ + type SCL_EASF_V_BF3_PWL_SLOPE_SEG0; \ + type SCL_EASF_V_BF3_PWL_IN_SEG1; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG1; \ + type SCL_EASF_V_BF3_PWL_SLOPE_SEG1; \ + type SCL_EASF_V_BF3_PWL_IN_SEG2; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG2; \ + type SCL_EASF_V_BF3_PWL_SLOPE_SEG2; \ + type SCL_EASF_V_BF3_PWL_IN_SEG3; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG3; \ + type SCL_EASF_V_BF3_PWL_SLOPE_SEG3; \ + type SCL_EASF_V_BF3_PWL_IN_SEG4; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG4; \ + type SCL_EASF_V_BF3_PWL_SLOPE_SEG4; \ + type SCL_EASF_V_BF3_PWL_IN_SEG5; \ + type SCL_EASF_V_BF3_PWL_BASE_SEG5; \ + type SCL_SC_MATRIX_C0; \ + type SCL_SC_MATRIX_C1; \ + type SCL_SC_MATRIX_C2; \ + type SCL_SC_MATRIX_C3; \ + type ISHARP_EN; \ + type ISHARP_NOISEDET_EN; \ + type ISHARP_NOISEDET_MODE; \ + type ISHARP_NOISEDET_UTHRE; \ + type ISHARP_NOISEDET_DTHRE; \ + type ISHARP_NOISEDET_PWL_START_IN; \ + type ISHARP_NOISEDET_PWL_END_IN; \ + type ISHARP_NOISEDET_PWL_SLOPE; \ + type ISHARP_LBA_MODE; \ + type ISHARP_LBA_PWL_IN_SEG0; \ + type ISHARP_LBA_PWL_BASE_SEG0; \ + type ISHARP_LBA_PWL_SLOPE_SEG0; \ + type ISHARP_LBA_PWL_IN_SEG1; \ + type ISHARP_LBA_PWL_BASE_SEG1; \ + type ISHARP_LBA_PWL_SLOPE_SEG1; \ + type ISHARP_LBA_PWL_IN_SEG2; \ + type ISHARP_LBA_PWL_BASE_SEG2; \ + type ISHARP_LBA_PWL_SLOPE_SEG2; \ + type ISHARP_LBA_PWL_IN_SEG3; \ + type ISHARP_LBA_PWL_BASE_SEG3; \ + type ISHARP_LBA_PWL_SLOPE_SEG3; \ + type ISHARP_LBA_PWL_IN_SEG4; \ + type ISHARP_LBA_PWL_BASE_SEG4; \ + type ISHARP_LBA_PWL_SLOPE_SEG4; \ + type ISHARP_LBA_PWL_IN_SEG5; \ + type ISHARP_LBA_PWL_BASE_SEG5; \ + type ISHARP_FMT_MODE; \ + type ISHARP_FMT_NORM; \ + type ISHARP_DELTA_LUT_SELECT; \ + type ISHARP_DELTA_LUT_SELECT_CURRENT; \ + type ISHARP_DELTA_LUT_HOST_SELECT; \ + type ISHARP_DELTA_DATA; \ + type ISHARP_DELTA_INDEX; \ + type ISHARP_NLDELTA_SCLIP_EN_P; \ + type ISHARP_NLDELTA_SCLIP_PIVOT_P; \ + type ISHARP_NLDELTA_SCLIP_SLOPE_P; \ + type ISHARP_NLDELTA_SCLIP_EN_N; \ + type ISHARP_NLDELTA_SCLIP_PIVOT_N; \ + type ISHARP_NLDELTA_SCLIP_SLOPE_N + +struct dcn401_dpp_registers { + DPP_DCN3_REG_VARIABLE_LIST_COMMON; + uint32_t CURSOR0_FP_SCALE_BIAS_G_Y; + uint32_t CURSOR0_FP_SCALE_BIAS_RB_CRCB; + uint32_t CUR0_MATRIX_MODE; + uint32_t CUR0_MATRIX_C11_C12_A; + uint32_t CUR0_MATRIX_C13_C14_A; + uint32_t CUR0_MATRIX_C21_C22_A; + uint32_t CUR0_MATRIX_C23_C24_A; + uint32_t CUR0_MATRIX_C31_C32_A; + uint32_t CUR0_MATRIX_C33_C34_A; + uint32_t CUR0_MATRIX_C11_C12_B; + uint32_t CUR0_MATRIX_C13_C14_B; + uint32_t CUR0_MATRIX_C21_C22_B; + uint32_t CUR0_MATRIX_C23_C24_B; + uint32_t CUR0_MATRIX_C31_C32_B; + uint32_t CUR0_MATRIX_C33_C34_B; + uint32_t DSCL_SC_MODE; + uint32_t DSCL_EASF_H_MODE; + uint32_t DSCL_EASF_H_BF_CNTL; + uint32_t DSCL_EASF_H_RINGEST_EVENTAP_REDUCE; + uint32_t DSCL_EASF_H_RINGEST_EVENTAP_GAIN; + uint32_t DSCL_EASF_H_BF_FINAL_MAX_MIN; + uint32_t DSCL_EASF_H_BF1_PWL_SEG0; + uint32_t DSCL_EASF_H_BF1_PWL_SEG1; + uint32_t DSCL_EASF_H_BF1_PWL_SEG2; + uint32_t DSCL_EASF_H_BF1_PWL_SEG3; + uint32_t DSCL_EASF_H_BF1_PWL_SEG4; + uint32_t DSCL_EASF_H_BF1_PWL_SEG5; + uint32_t DSCL_EASF_H_BF1_PWL_SEG6; + uint32_t DSCL_EASF_H_BF1_PWL_SEG7; + uint32_t DSCL_EASF_H_BF3_PWL_SEG0; + uint32_t DSCL_EASF_H_BF3_PWL_SEG1; + uint32_t DSCL_EASF_H_BF3_PWL_SEG2; + uint32_t DSCL_EASF_H_BF3_PWL_SEG3; + uint32_t DSCL_EASF_H_BF3_PWL_SEG4; + uint32_t DSCL_EASF_H_BF3_PWL_SEG5; + uint32_t DSCL_EASF_V_MODE; + uint32_t DSCL_EASF_V_BF_CNTL; + uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL1; + uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL2; + uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL3; + uint32_t DSCL_EASF_V_RINGEST_EVENTAP_REDUCE; + uint32_t DSCL_EASF_V_RINGEST_EVENTAP_GAIN; + uint32_t DSCL_EASF_V_BF_FINAL_MAX_MIN; + uint32_t DSCL_EASF_V_BF1_PWL_SEG0; + uint32_t DSCL_EASF_V_BF1_PWL_SEG1; + uint32_t DSCL_EASF_V_BF1_PWL_SEG2; + uint32_t DSCL_EASF_V_BF1_PWL_SEG3; + uint32_t DSCL_EASF_V_BF1_PWL_SEG4; + uint32_t DSCL_EASF_V_BF1_PWL_SEG5; + uint32_t DSCL_EASF_V_BF1_PWL_SEG6; + uint32_t DSCL_EASF_V_BF1_PWL_SEG7; + uint32_t DSCL_EASF_V_BF3_PWL_SEG0; + uint32_t DSCL_EASF_V_BF3_PWL_SEG1; + uint32_t DSCL_EASF_V_BF3_PWL_SEG2; + uint32_t DSCL_EASF_V_BF3_PWL_SEG3; + uint32_t DSCL_EASF_V_BF3_PWL_SEG4; + uint32_t DSCL_EASF_V_BF3_PWL_SEG5; + uint32_t DSCL_SC_MATRIX_C0C1; + uint32_t DSCL_SC_MATRIX_C2C3; + uint32_t ISHARP_MODE; + uint32_t ISHARP_NOISEDET_THRESHOLD; + uint32_t ISHARP_NOISE_GAIN_PWL; + uint32_t ISHARP_LBA_PWL_SEG0; + uint32_t ISHARP_LBA_PWL_SEG1; + uint32_t ISHARP_LBA_PWL_SEG2; + uint32_t ISHARP_LBA_PWL_SEG3; + uint32_t ISHARP_LBA_PWL_SEG4; + uint32_t ISHARP_LBA_PWL_SEG5; + uint32_t ISHARP_DELTA_CTRL; + uint32_t ISHARP_DELTA_DATA; + uint32_t ISHARP_DELTA_INDEX; + uint32_t ISHARP_NLDELTA_SOFT_CLIP; +}; + +struct dcn401_dpp_shift { + DPP_REG_FIELD_LIST_DCN401(uint8_t); +}; + +struct dcn401_dpp_mask { + DPP_REG_FIELD_LIST_DCN401(uint32_t); +}; + +struct dcn401_dpp { + struct dpp base; + + const struct dcn401_dpp_registers *tf_regs; + const struct dcn401_dpp_shift *tf_shift; + const struct dcn401_dpp_mask *tf_mask; + + const uint16_t *filter_v; + const uint16_t *filter_h; + const uint16_t *filter_v_c; + const uint16_t *filter_h_c; + int lb_pixel_depth_supported; + int lb_memory_size; + int lb_bits_per_entry; + bool is_write_to_ram_a_safe; + struct scaler_data scl_data; + struct pwl_params pwl_data; +}; + +bool dpp401_construct(struct dcn401_dpp *dpp401, + struct dc_context *ctx, + uint32_t inst, + const struct dcn401_dpp_registers *tf_regs, + const struct dcn401_dpp_shift *tf_shift, + const struct dcn401_dpp_mask *tf_mask); + +void dpp401_dscl_set_scaler_manual_scale( + struct dpp *dpp_base, + const struct scaler_data *scl_data); + +void dpp401_full_bypass(struct dpp *dpp_base); + +void dpp401_dpp_setup( + struct dpp *dpp_base, + enum surface_pixel_format format, + enum expansion_mode mode, + struct dc_csc_transform input_csc_color_matrix, + enum dc_color_space input_color_space, + struct cnv_alpha_2bit_lut *alpha_2bit_lut); + +void dpp401_set_cursor_attributes( + struct dpp *dpp_base, + struct dc_cursor_attributes *cursor_attributes); + +void dpp401_set_cursor_position( + struct dpp *dpp_base, + const struct dc_cursor_position *pos, + const struct dc_cursor_mi_param *param, + uint32_t width, + uint32_t height); + +void dpp401_set_optional_cursor_attributes( + struct dpp *dpp_base, + struct dpp_cursor_attributes *attr); + +void dscl401_calc_lb_num_partitions( + const struct scaler_data *scl_data, + enum lb_memory_config lb_config, + int *num_part_y, + int *num_part_c); + +void dscl401_spl_calc_lb_num_partitions( + bool alpha_en, + const struct spl_scaler_data *scl_data, + enum lb_memory_config lb_config, + int *num_part_y, + int *num_part_c); + +void dpp401_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s); + +void dpp401_set_cursor_matrix( + struct dpp *dpp_base, + enum dc_color_space color_space, + struct dc_csc_transform cursor_csc_color_matrix); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c new file mode 100644 index 000000000000..a54b9089f15d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -0,0 +1,303 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + +#include "core_types.h" + +#include "reg_helper.h" +#include "dcn401/dcn401_dpp.h" +#include "basics/conversion.h" +#include "dcn10/dcn10_cm_common.h" + +#define NUM_PHASES 64 +#define HORZ_MAX_TAPS 8 +#define VERT_MAX_TAPS 8 + +#define BLACK_OFFSET_RGB_Y 0x0 +#define BLACK_OFFSET_CBCR 0x8000 + +#define REG(reg)\ + dpp->tf_regs->reg + +#define CTX \ + dpp->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + dpp->tf_shift->field_name, dpp->tf_mask->field_name + +#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) + + +enum dcn401_coef_filter_type_sel { + SCL_COEF_LUMA_VERT_FILTER = 0, + SCL_COEF_LUMA_HORZ_FILTER = 1, + SCL_COEF_CHROMA_VERT_FILTER = 2, + SCL_COEF_CHROMA_HORZ_FILTER = 3, + SCL_COEF_SC_VERT_FILTER = 4, + SCL_COEF_SC_HORZ_FILTER = 5 +}; + +enum dscl_autocal_mode { + AUTOCAL_MODE_OFF = 0, + + /* Autocal calculate the scaling ratio and initial phase and the + * DSCL_MODE_SEL must be set to 1 + */ + AUTOCAL_MODE_AUTOSCALE = 1, + /* Autocal perform auto centering without replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOCENTER = 2, + /* Autocal perform auto centering and auto replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOREPLICATE = 3 +}; + +enum dscl_mode_sel { + DSCL_MODE_SCALING_444_BYPASS = 0, + DSCL_MODE_SCALING_444_RGB_ENABLE = 1, + DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + DSCL_MODE_SCALING_YCBCR_ENABLE = 3, + DSCL_MODE_LUMA_SCALING_BYPASS = 4, + DSCL_MODE_CHROMA_SCALING_BYPASS = 5, + DSCL_MODE_DSCL_BYPASS = 6 +}; + +void dpp401_full_bypass(struct dpp *dpp_base) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + /* Input pixel format: ARGB8888 */ + REG_SET(CNVC_SURFACE_PIXEL_FORMAT, 0, + CNVC_SURFACE_PIXEL_FORMAT, 0x8); + + /* Zero expansion */ + REG_SET_3(FORMAT_CONTROL, 0, + CNVC_BYPASS, 0, + FORMAT_CONTROL__ALPHA_EN, 0, + FORMAT_EXPANSION_MODE, 0); + + /* COLOR_KEYER_CONTROL.COLOR_KEYER_EN = 0 this should be default */ + if (dpp->tf_mask->CM_BYPASS_EN) + REG_SET(CM_CONTROL, 0, CM_BYPASS_EN, 1); + else + REG_SET(CM_CONTROL, 0, CM_BYPASS, 1); + + /* Setting degamma bypass for now */ + REG_SET(CM_DGAM_CONTROL, 0, CM_DGAM_LUT_MODE, 0); +} + +void dpp401_set_cursor_attributes( + struct dpp *dpp_base, + struct dc_cursor_attributes *cursor_attributes) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + enum dc_cursor_color_format color_format = cursor_attributes->color_format; + int cur_rom_en = 0; + + if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || + color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } + } + + REG_UPDATE_3(CURSOR0_CONTROL, + CUR0_MODE, color_format, + CUR0_EXPANSION_MODE, 0, + CUR0_ROM_EN, cur_rom_en); + + if (color_format == CURSOR_MODE_MONO) { + /* todo: clarify what to program these to */ + REG_UPDATE(CURSOR0_COLOR0, + CUR0_COLOR0, 0x00000000); + REG_UPDATE(CURSOR0_COLOR1, + CUR0_COLOR1, 0xFFFFFFFF); + } + + dpp_base->att.cur0_ctl.bits.expansion_mode = 0; + dpp_base->att.cur0_ctl.bits.cur0_rom_en = cur_rom_en; + dpp_base->att.cur0_ctl.bits.mode = color_format; +} + +void dpp401_set_cursor_position( + struct dpp *dpp_base, + const struct dc_cursor_position *pos, + const struct dc_cursor_mi_param *param, + uint32_t width, + uint32_t height) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + int x_pos = pos->x - param->recout.x; + int y_pos = pos->y - param->recout.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int rec_x_offset = x_pos - pos->x_hotspot; + int rec_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; + uint32_t cur_en = pos->enable ? 1 : 0; + + // Transform cursor width / height and hotspots for offset calculations + if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + + if (param->rotation == ROTATION_ANGLE_90) { + // hotspot = (-y, x) + rec_x_offset = x_pos - (cursor_width - x_hotspot); + rec_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + rec_x_offset = x_pos - x_hotspot; + rec_y_offset = y_pos - (cursor_height - y_hotspot); + } + } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) + if (!param->mirror) + rec_x_offset = x_pos - (cursor_width - x_hotspot); + + rec_y_offset = y_pos - (cursor_height - y_hotspot); + } + + if (rec_x_offset >= (int)param->recout.width) + cur_en = 0; /* not visible beyond right edge*/ + + if (rec_x_offset + cursor_width <= 0) + cur_en = 0; /* not visible beyond left edge*/ + + if (rec_y_offset >= (int)param->recout.height) + cur_en = 0; /* not visible beyond bottom edge*/ + + if (rec_y_offset + cursor_height <= 0) + cur_en = 0; /* not visible beyond top edge*/ + + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en); + + dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en; +} + +void dpp401_set_optional_cursor_attributes( + struct dpp *dpp_base, + struct dpp_cursor_attributes *attr) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + if (attr) { + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, attr->bias); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, attr->scale); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, attr->bias); + REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, attr->scale); + } +} + +/* Program Cursor matrix block in DPP CM */ +static void dpp401_program_cursor_csc( + struct dpp *dpp_base, + enum dc_color_space color_space, + const struct dpp_input_csc_matrix *tbl_entry) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + uint32_t mode_select = 0; + struct color_matrices_reg cur_matrix_regs; + unsigned int i; + const uint16_t *regval = NULL; + int arr_size = sizeof(dpp_input_csc_matrix) / sizeof(struct dpp_input_csc_matrix); + + if (color_space < COLOR_SPACE_YCBCR601) { + REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, CUR_MATRIX_BYPASS); + return; + } + + /* If adjustments not provided use hardcoded table for color space conversion */ + if (tbl_entry == NULL) { + + for (i = 0; i < arr_size; i++) + if (dpp_input_csc_matrix[i].color_space == color_space) { + regval = dpp_input_csc_matrix[i].regval; + break; + } + + if (regval == NULL) { + BREAK_TO_DEBUGGER(); + REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, CUR_MATRIX_BYPASS); + return; + } + } else { + regval = tbl_entry->regval; + } + + REG_GET(CUR0_MATRIX_MODE, CUR0_MATRIX_MODE_CURRENT, &mode_select); + + //If current set in use not set A, then use set A, otherwise use set B + if (mode_select != CUR_MATRIX_SET_A) + mode_select = CUR_MATRIX_SET_A; + else + mode_select = CUR_MATRIX_SET_B; + + cur_matrix_regs.shifts.csc_c11 = dpp->tf_shift->CUR0_MATRIX_C11_A; + cur_matrix_regs.masks.csc_c11 = dpp->tf_mask->CUR0_MATRIX_C11_A; + cur_matrix_regs.shifts.csc_c12 = dpp->tf_shift->CUR0_MATRIX_C12_A; + cur_matrix_regs.masks.csc_c12 = dpp->tf_mask->CUR0_MATRIX_C12_A; + + if (mode_select == CUR_MATRIX_SET_A) { + cur_matrix_regs.csc_c11_c12 = REG(CUR0_MATRIX_C11_C12_A); + cur_matrix_regs.csc_c33_c34 = REG(CUR0_MATRIX_C33_C34_A); + } else { + cur_matrix_regs.csc_c11_c12 = REG(CUR0_MATRIX_C11_C12_B); + cur_matrix_regs.csc_c33_c34 = REG(CUR0_MATRIX_C33_C34_B); + } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &cur_matrix_regs); + + //select coefficient set to use + REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, mode_select); +} + +/* Program Cursor matrix block in DPP CM */ +void dpp401_set_cursor_matrix( + struct dpp *dpp_base, + enum dc_color_space color_space, + struct dc_csc_transform cursor_csc_color_matrix) +{ + struct dpp_input_csc_matrix cursor_tbl_entry; + unsigned int i; + + if (cursor_csc_color_matrix.enable_adjustment == true) { + for (i = 0; i < 12; i++) + cursor_tbl_entry.regval[i] = cursor_csc_color_matrix.matrix[i]; + + cursor_tbl_entry.color_space = color_space; + dpp401_program_cursor_csc(dpp_base, color_space, &cursor_tbl_entry); + } else { + dpp401_program_cursor_csc(dpp_base, color_space, NULL); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c new file mode 100644 index 000000000000..c20376083441 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -0,0 +1,968 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + +#include "core_types.h" + +#include "reg_helper.h" +#include "dcn401/dcn401_dpp.h" +#include "basics/conversion.h" + + +#define NUM_PHASES 64 +#define HORZ_MAX_TAPS 8 +#define VERT_MAX_TAPS 8 +#define NUM_LEVELS 32 +#define BLACK_OFFSET_RGB_Y 0x0 +#define BLACK_OFFSET_CBCR 0x8000 + + +#define REG(reg)\ + dpp->tf_regs->reg + +#define CTX \ + dpp->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + dpp->tf_shift->field_name, dpp->tf_mask->field_name + +enum dcn401_coef_filter_type_sel { + SCL_COEF_LUMA_VERT_FILTER = 0, + SCL_COEF_LUMA_HORZ_FILTER = 1, + SCL_COEF_CHROMA_VERT_FILTER = 2, + SCL_COEF_CHROMA_HORZ_FILTER = 3, + SCL_COEF_ALPHA_VERT_FILTER = 4, + SCL_COEF_ALPHA_HORZ_FILTER = 5 +}; + +enum dscl_autocal_mode { + AUTOCAL_MODE_OFF = 0, + + /* Autocal calculate the scaling ratio and initial phase and the + * DSCL_MODE_SEL must be set to 1 + */ + AUTOCAL_MODE_AUTOSCALE = 1, + /* Autocal perform auto centering without replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOCENTER = 2, + /* Autocal perform auto centering and auto replication and the + * DSCL_MODE_SEL must be set to 0 + */ + AUTOCAL_MODE_AUTOREPLICATE = 3 +}; + +enum dscl_mode_sel { + DSCL_MODE_SCALING_444_BYPASS = 0, + DSCL_MODE_SCALING_444_RGB_ENABLE = 1, + DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, + DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, + DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, + DSCL_MODE_DSCL_BYPASS = 6 +}; + +static int dpp401_dscl_get_pixel_depth_val(enum lb_pixel_depth depth) +{ + if (depth == LB_PIXEL_DEPTH_30BPP) + return 0; /* 10 bpc */ + else if (depth == LB_PIXEL_DEPTH_24BPP) + return 1; /* 8 bpc */ + else if (depth == LB_PIXEL_DEPTH_18BPP) + return 2; /* 6 bpc */ + else if (depth == LB_PIXEL_DEPTH_36BPP) + return 3; /* 12 bpc */ + else { + ASSERT(0); + return -1; /* Unsupported */ + } +} + +static bool dpp401_dscl_is_video_format(enum pixel_format format) +{ + if (format >= PIXEL_FORMAT_VIDEO_BEGIN + && format <= PIXEL_FORMAT_VIDEO_END) + return true; + else + return false; +} + +static bool dpp401_dscl_is_420_format(enum pixel_format format) +{ + if (format == PIXEL_FORMAT_420BPP8 || + format == PIXEL_FORMAT_420BPP10) + return true; + else + return false; +} + +static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( + struct dpp *dpp_base, + const struct scaler_data *data, + bool dbg_always_scale) +{ + const long long one = dc_fixpt_one.value; + + if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { + /* DSCL is processing data in fixed format */ + if (data->format == PIXEL_FORMAT_FP16) + return DSCL_MODE_DSCL_BYPASS; + } + + if (data->ratios.horz.value == one + && data->ratios.vert.value == one + && data->ratios.horz_c.value == one + && data->ratios.vert_c.value == one + && !dbg_always_scale) + return DSCL_MODE_SCALING_444_BYPASS; + + if (!dpp401_dscl_is_420_format(data->format)) { + if (dpp401_dscl_is_video_format(data->format)) + return DSCL_MODE_SCALING_444_YCBCR_ENABLE; + else + return DSCL_MODE_SCALING_444_RGB_ENABLE; + } + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return DSCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return DSCL_MODE_SCALING_420_CHROMA_BYPASS; + + return DSCL_MODE_SCALING_420_YCBCR_ENABLE; +} + +static void dpp401_power_on_dscl( + struct dpp *dpp_base, + bool power_on) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + if (dpp->tf_regs->DSCL_MEM_PWR_CTRL) { + if (power_on) { + REG_UPDATE(DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, 0); + REG_WAIT(DSCL_MEM_PWR_STATUS, LUT_MEM_PWR_STATE, 0, 1, 5); + } else { + if (dpp->base.ctx->dc->debug.enable_mem_low_power.bits.dscl) { + dpp->base.ctx->dc->optimized_required = true; + dpp->base.deferred_reg_writes.bits.disable_dscl = true; + } else { + REG_UPDATE(DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, 3); + } + } + } +} + + +static void dpp401_dscl_set_lb( + struct dcn401_dpp *dpp, + const struct line_buffer_params *lb_params, + enum lb_memory_config mem_size_config) +{ + uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */ + + /* LB */ + if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { + /* DSCL caps: pixel data processed in fixed format */ + uint32_t pixel_depth = dpp401_dscl_get_pixel_depth_val(lb_params->depth); + uint32_t dyn_pix_depth = lb_params->dynamic_pixel_depth; + + REG_SET_7(LB_DATA_FORMAT, 0, + PIXEL_DEPTH, pixel_depth, /* Pixel depth stored in LB */ + PIXEL_EXPAN_MODE, lb_params->pixel_expan_mode, /* Pixel expansion mode */ + PIXEL_REDUCE_MODE, 1, /* Pixel reduction mode: Rounding */ + DYNAMIC_PIXEL_DEPTH, dyn_pix_depth, /* Dynamic expansion pixel depth */ + DITHER_EN, 0, /* Dithering enable: Disabled */ + INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */ + LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ + } else { + /* DSCL caps: pixel data processed in float format */ + REG_SET_2(LB_DATA_FORMAT, 0, + INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */ + LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ + } + + if (dpp->base.caps->max_lb_partitions == 31) + max_partitions = 31; + + REG_SET_2(LB_MEMORY_CTRL, 0, + MEMORY_CONFIG, mem_size_config, + LB_MAX_PARTITIONS, max_partitions); +} + +static const uint16_t *dpp401_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return get_filter_8tap_64p(ratio); + else if (taps == 7) + return get_filter_7tap_64p(ratio); + else if (taps == 6) + return get_filter_6tap_64p(ratio); + else if (taps == 5) + return get_filter_5tap_64p(ratio); + else if (taps == 4) + return get_filter_4tap_64p(ratio); + else if (taps == 3) + return get_filter_3tap_64p(ratio); + else if (taps == 2) + return get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + BREAK_TO_DEBUGGER(); + return NULL; + } +} + +static void dpp401_dscl_set_scaler_filter( + struct dcn401_dpp *dpp, + uint32_t taps, + enum dcn401_coef_filter_type_sel filter_type, + const uint16_t *filter) +{ + const int tap_pairs = (taps + 1) / 2; + int phase; + int pair; + uint16_t odd_coef, even_coef; + + REG_SET_3(SCL_COEF_RAM_TAP_SELECT, 0, + SCL_COEF_RAM_TAP_PAIR_IDX, 0, + SCL_COEF_RAM_PHASE, 0, + SCL_COEF_RAM_FILTER_TYPE, filter_type); + + for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) { + for (pair = 0; pair < tap_pairs; pair++) { + even_coef = filter[phase * taps + 2 * pair]; + if ((pair * 2 + 1) < taps) + odd_coef = filter[phase * taps + 2 * pair + 1]; + else + odd_coef = 0; + + REG_SET_4(SCL_COEF_RAM_TAP_DATA, 0, + /* Even tap coefficient (bits 1:0 fixed to 0) */ + SCL_COEF_RAM_EVEN_TAP_COEF, even_coef, + /* Write/read control for even coefficient */ + SCL_COEF_RAM_EVEN_TAP_COEF_EN, 1, + /* Odd tap coefficient (bits 1:0 fixed to 0) */ + SCL_COEF_RAM_ODD_TAP_COEF, odd_coef, + /* Write/read control for odd coefficient */ + SCL_COEF_RAM_ODD_TAP_COEF_EN, 1); + } + } + +} + +static void dpp401_dscl_set_scl_filter( + struct dcn401_dpp *dpp, + const struct scaler_data *scl_data, + bool chroma_coef_mode) +{ + bool h_2tap_hardcode_coef_en = false; + bool v_2tap_hardcode_coef_en = false; + bool h_2tap_sharp_en = false; + bool v_2tap_sharp_en = false; + uint32_t h_2tap_sharp_factor = scl_data->sharpness.horz; + uint32_t v_2tap_sharp_factor = scl_data->sharpness.vert; + bool coef_ram_current; + const uint16_t *filter_h = NULL; + const uint16_t *filter_v = NULL; + const uint16_t *filter_h_c = NULL; + const uint16_t *filter_v_c = NULL; + + if (dpp->base.ctx->dc->config.use_spl) { + filter_h = scl_data->dscl_prog_data.filter_h; + filter_v = scl_data->dscl_prog_data.filter_v; + filter_h_c = scl_data->dscl_prog_data.filter_h_c; + filter_v_c = scl_data->dscl_prog_data.filter_v_c; + } else { + filter_h = dpp401_dscl_get_filter_coeffs_64p( + scl_data->taps.h_taps, scl_data->ratios.horz); + filter_v = dpp401_dscl_get_filter_coeffs_64p( + scl_data->taps.v_taps, scl_data->ratios.vert); + filter_h_c = dpp401_dscl_get_filter_coeffs_64p( + scl_data->taps.h_taps_c, scl_data->ratios.horz_c); + filter_v_c = dpp401_dscl_get_filter_coeffs_64p( + scl_data->taps.v_taps_c, scl_data->ratios.vert_c); + } + + h_2tap_hardcode_coef_en = scl_data->taps.h_taps < 3 + && scl_data->taps.h_taps_c < 3 + && (scl_data->taps.h_taps > 1 && scl_data->taps.h_taps_c > 1); + v_2tap_hardcode_coef_en = scl_data->taps.v_taps < 3 + && scl_data->taps.v_taps_c < 3 + && (scl_data->taps.v_taps > 1 && scl_data->taps.v_taps_c > 1); + + h_2tap_sharp_en = h_2tap_hardcode_coef_en && h_2tap_sharp_factor != 0; + v_2tap_sharp_en = v_2tap_hardcode_coef_en && v_2tap_sharp_factor != 0; + + REG_UPDATE_6(DSCL_2TAP_CONTROL, + SCL_H_2TAP_HARDCODE_COEF_EN, h_2tap_hardcode_coef_en, + SCL_H_2TAP_SHARP_EN, h_2tap_sharp_en, + SCL_H_2TAP_SHARP_FACTOR, h_2tap_sharp_factor, + SCL_V_2TAP_HARDCODE_COEF_EN, v_2tap_hardcode_coef_en, + SCL_V_2TAP_SHARP_EN, v_2tap_sharp_en, + SCL_V_2TAP_SHARP_FACTOR, v_2tap_sharp_factor); + + if (!v_2tap_hardcode_coef_en || !h_2tap_hardcode_coef_en) { + bool filter_updated = false; + + filter_updated = (filter_h && (filter_h != dpp->filter_h)) + || (filter_v && (filter_v != dpp->filter_v)); + + if (chroma_coef_mode) { + filter_updated = filter_updated || (filter_h_c && (filter_h_c != dpp->filter_h_c)) + || (filter_v_c && (filter_v_c != dpp->filter_v_c)); + } + + if (filter_updated) { + uint32_t scl_mode = REG_READ(SCL_MODE); + + if (!h_2tap_hardcode_coef_en && filter_h) { + dpp401_dscl_set_scaler_filter( + dpp, scl_data->taps.h_taps, + SCL_COEF_LUMA_HORZ_FILTER, filter_h); + } + dpp->filter_h = filter_h; + if (!v_2tap_hardcode_coef_en && filter_v) { + dpp401_dscl_set_scaler_filter( + dpp, scl_data->taps.v_taps, + SCL_COEF_LUMA_VERT_FILTER, filter_v); + } + dpp->filter_v = filter_v; + if (chroma_coef_mode) { + if (!h_2tap_hardcode_coef_en && filter_h_c) { + dpp401_dscl_set_scaler_filter( + dpp, scl_data->taps.h_taps_c, + SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c); + } + if (!v_2tap_hardcode_coef_en && filter_v_c) { + dpp401_dscl_set_scaler_filter( + dpp, scl_data->taps.v_taps_c, + SCL_COEF_CHROMA_VERT_FILTER, filter_v_c); + } + } + dpp->filter_h_c = filter_h_c; + dpp->filter_v_c = filter_v_c; + + coef_ram_current = get_reg_field_value_ex( + scl_mode, dpp->tf_mask->SCL_COEF_RAM_SELECT_CURRENT, + dpp->tf_shift->SCL_COEF_RAM_SELECT_CURRENT); + + /* Swap coefficient RAM and set chroma coefficient mode */ + REG_SET_2(SCL_MODE, scl_mode, + SCL_COEF_RAM_SELECT, !coef_ram_current, + SCL_CHROMA_COEF_MODE, chroma_coef_mode); + } + } +} + +// TODO: Fix defined but not used error +//static int dpp401_dscl_get_lb_depth_bpc(enum lb_pixel_depth depth) +//{ +// if (depth == LB_PIXEL_DEPTH_30BPP) +// return 10; +// else if (depth == LB_PIXEL_DEPTH_24BPP) +// return 8; +// else if (depth == LB_PIXEL_DEPTH_18BPP) +// return 6; +// else if (depth == LB_PIXEL_DEPTH_36BPP) +// return 12; +// else { +// BREAK_TO_DEBUGGER(); +// return -1; /* Unsupported */ +// } +//} + +// TODO: Fix defined but not used error +//void dpp401_dscl_calc_lb_num_partitions( +// const struct scaler_data *scl_data, +// enum lb_memory_config lb_config, +// int *num_part_y, +// int *num_part_c) +//{ +// int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a, +// lb_bpc, memory_line_size_y, memory_line_size_c, memory_line_size_a; +// +// int line_size = scl_data->viewport.width < scl_data->recout.width ? +// scl_data->viewport.width : scl_data->recout.width; +// int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ? +// scl_data->viewport_c.width : scl_data->recout.width; +// +// if (line_size == 0) +// line_size = 1; +// +// if (line_size_c == 0) +// line_size_c = 1; +// +// +// lb_bpc = dpp401_dscl_get_lb_depth_bpc(scl_data->lb_params.depth); +// memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */ +// memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */ +// memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */ +// +// if (lb_config == LB_MEMORY_CONFIG_1) { +// lb_memory_size = 816; +// lb_memory_size_c = 816; +// lb_memory_size_a = 984; +// } else if (lb_config == LB_MEMORY_CONFIG_2) { +// lb_memory_size = 1088; +// lb_memory_size_c = 1088; +// lb_memory_size_a = 1312; +// } else if (lb_config == LB_MEMORY_CONFIG_3) { +// /* 420 mode: using 3rd mem from Y, Cr and Cb */ +// lb_memory_size = 816 + 1088 + 848 + 848 + 848; +// lb_memory_size_c = 816 + 1088; +// lb_memory_size_a = 984 + 1312 + 456; +// } else { +// lb_memory_size = 816 + 1088 + 848; +// lb_memory_size_c = 816 + 1088 + 848; +// lb_memory_size_a = 984 + 1312 + 456; +// } +// *num_part_y = lb_memory_size / memory_line_size_y; +// *num_part_c = lb_memory_size_c / memory_line_size_c; +// num_partitions_a = lb_memory_size_a / memory_line_size_a; +// +// if (scl_data->lb_params.alpha_en +// && (num_partitions_a < *num_part_y)) +// *num_part_y = num_partitions_a; +// +// if (*num_part_y > 64) +// *num_part_y = 64; +// if (*num_part_c > 64) +// *num_part_c = 64; +// +//} + +static bool dpp401_dscl_is_lb_conf_valid(int ceil_vratio, int num_partitions, int vtaps) +{ + if (ceil_vratio > 2) + return vtaps <= (num_partitions - ceil_vratio + 2); + else + return vtaps <= num_partitions; +} + +/*find first match configuration which meets the min required lb size*/ +static enum lb_memory_config dpp401_dscl_find_lb_memory_config(struct dcn401_dpp *dpp, + const struct scaler_data *scl_data) +{ + int num_part_y, num_part_c; + int vtaps = scl_data->taps.v_taps; + int vtaps_c = scl_data->taps.v_taps_c; + int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert); + int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c); + + if (dpp->base.ctx->dc->debug.use_max_lb) { + if (scl_data->format == PIXEL_FORMAT_420BPP8 + || scl_data->format == PIXEL_FORMAT_420BPP10) + return LB_MEMORY_CONFIG_3; + return LB_MEMORY_CONFIG_0; + } + + dpp->base.caps->dscl_calc_lb_num_partitions( + scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c); + + if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps) + && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c)) + return LB_MEMORY_CONFIG_1; + + dpp->base.caps->dscl_calc_lb_num_partitions( + scl_data, LB_MEMORY_CONFIG_2, &num_part_y, &num_part_c); + + if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps) + && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c)) + return LB_MEMORY_CONFIG_2; + + if (scl_data->format == PIXEL_FORMAT_420BPP8 + || scl_data->format == PIXEL_FORMAT_420BPP10) { + dpp->base.caps->dscl_calc_lb_num_partitions( + scl_data, LB_MEMORY_CONFIG_3, &num_part_y, &num_part_c); + + if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps) + && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c)) + return LB_MEMORY_CONFIG_3; + } + + dpp->base.caps->dscl_calc_lb_num_partitions( + scl_data, LB_MEMORY_CONFIG_0, &num_part_y, &num_part_c); + + /*Ensure we can support the requested number of vtaps*/ + ASSERT(dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps) + && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c)); + + return LB_MEMORY_CONFIG_0; +} + + +static void dpp401_dscl_set_manual_ratio_init( + struct dcn401_dpp *dpp, const struct scaler_data *data) +{ + uint32_t init_frac = 0; + uint32_t init_int = 0; + if (dpp->base.ctx->dc->config.use_spl) { + REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0, + SCL_H_SCALE_RATIO, data->dscl_prog_data.ratios.h_scale_ratio); + + REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0, + SCL_V_SCALE_RATIO, data->dscl_prog_data.ratios.v_scale_ratio); + + REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0, + SCL_H_SCALE_RATIO_C, data->dscl_prog_data.ratios.h_scale_ratio_c); + + REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0, + SCL_V_SCALE_RATIO_C, data->dscl_prog_data.ratios.v_scale_ratio_c); + + REG_SET_2(SCL_HORZ_FILTER_INIT, 0, + SCL_H_INIT_FRAC, data->dscl_prog_data.init.h_filter_init_frac, + SCL_H_INIT_INT, data->dscl_prog_data.init.h_filter_init_int); + + REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0, + SCL_H_INIT_FRAC_C, data->dscl_prog_data.init.h_filter_init_frac_c, + SCL_H_INIT_INT_C, data->dscl_prog_data.init.h_filter_init_int_c); + + REG_SET_2(SCL_VERT_FILTER_INIT, 0, + SCL_V_INIT_FRAC, data->dscl_prog_data.init.v_filter_init_frac, + SCL_V_INIT_INT, data->dscl_prog_data.init.v_filter_init_int); + + if (REG(SCL_VERT_FILTER_INIT_BOT)) { + REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, + SCL_V_INIT_FRAC_BOT, data->dscl_prog_data.init.v_filter_init_bot_frac, + SCL_V_INIT_INT_BOT, data->dscl_prog_data.init.v_filter_init_bot_int); + } + + REG_SET_2(SCL_VERT_FILTER_INIT_C, 0, + SCL_V_INIT_FRAC_C, data->dscl_prog_data.init.v_filter_init_frac_c, + SCL_V_INIT_INT_C, data->dscl_prog_data.init.v_filter_init_int_c); + + if (REG(SCL_VERT_FILTER_INIT_BOT_C)) { + REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, + SCL_V_INIT_FRAC_BOT_C, data->dscl_prog_data.init.v_filter_init_bot_frac_c, + SCL_V_INIT_INT_BOT_C, data->dscl_prog_data.init.v_filter_init_bot_int_c); + } + return; + } + REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0, + SCL_H_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.horz) << 5); + + REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0, + SCL_V_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.vert) << 5); + + REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0, + SCL_H_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.horz_c) << 5); + + REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0, + SCL_V_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.vert_c) << 5); + + /* + * 0.24 format for fraction, first five bits zeroed + */ + init_frac = dc_fixpt_u0d19(data->inits.h) << 5; + init_int = dc_fixpt_floor(data->inits.h); + REG_SET_2(SCL_HORZ_FILTER_INIT, 0, + SCL_H_INIT_FRAC, init_frac, + SCL_H_INIT_INT, init_int); + + init_frac = dc_fixpt_u0d19(data->inits.h_c) << 5; + init_int = dc_fixpt_floor(data->inits.h_c); + REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0, + SCL_H_INIT_FRAC_C, init_frac, + SCL_H_INIT_INT_C, init_int); + + init_frac = dc_fixpt_u0d19(data->inits.v) << 5; + init_int = dc_fixpt_floor(data->inits.v); + REG_SET_2(SCL_VERT_FILTER_INIT, 0, + SCL_V_INIT_FRAC, init_frac, + SCL_V_INIT_INT, init_int); + + if (REG(SCL_VERT_FILTER_INIT_BOT)) { + struct fixed31_32 bot = dc_fixpt_add(data->inits.v, data->ratios.vert); + + init_frac = dc_fixpt_u0d19(bot) << 5; + init_int = dc_fixpt_floor(bot); + REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, + SCL_V_INIT_FRAC_BOT, init_frac, + SCL_V_INIT_INT_BOT, init_int); + } + + init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5; + init_int = dc_fixpt_floor(data->inits.v_c); + REG_SET_2(SCL_VERT_FILTER_INIT_C, 0, + SCL_V_INIT_FRAC_C, init_frac, + SCL_V_INIT_INT_C, init_int); + + if (REG(SCL_VERT_FILTER_INIT_BOT_C)) { + struct fixed31_32 bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); + + init_frac = dc_fixpt_u0d19(bot) << 5; + init_int = dc_fixpt_floor(bot); + REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, + SCL_V_INIT_FRAC_BOT_C, init_frac, + SCL_V_INIT_INT_BOT_C, init_int); + } +} + +/** + * dpp401_dscl_set_recout - Set the first pixel of RECOUT in the OTG active area + * + * @dpp: DPP data struct + * @recout: Rectangle information + * + * This function sets the MPC RECOUT_START and RECOUT_SIZE registers based on + * the values specified in the recount parameter. + * + * Note: This function only have effect if AutoCal is disabled. + */ +static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, + const struct rect *recout) +{ + REG_SET_2(RECOUT_START, 0, + /* First pixel of RECOUT in the active OTG area */ + RECOUT_START_X, recout->x, + /* First line of RECOUT in the active OTG area */ + RECOUT_START_Y, recout->y); + + REG_SET_2(RECOUT_SIZE, 0, + /* Number of RECOUT horizontal pixels */ + RECOUT_WIDTH, recout->width, + /* Number of RECOUT vertical lines */ + RECOUT_HEIGHT, recout->height); +} +/** + * dpp401_dscl_program_easf - Program EASF + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program EASF + * + */ +static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + REG_UPDATE(DSCL_SC_MODE, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); + REG_UPDATE(DSCL_SC_MODE, + SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); + // DSCL_EASF_V_MODE + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); + REG_UPDATE(DSCL_EASF_V_MODE, + SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_V_BF_CNTL, + SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); + REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); + REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); + // DSCL_EASF_H_MODE + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); + REG_UPDATE(DSCL_EASF_H_MODE, + SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); + REG_UPDATE(DSCL_EASF_H_BF_CNTL, + SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); + REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); + REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); + PERF_TRACE(); +} +static void dpp401_dscl_set_isharp_filter( + struct dcn401_dpp *dpp, const uint32_t *filter) +{ + int level; + uint32_t filter_data; + REG_UPDATE(ISHARP_DELTA_CTRL, + ISHARP_DELTA_LUT_HOST_SELECT, 0); + for (level = 0; level < NUM_LEVELS; level++) { + filter_data = filter[level]; + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, level); + REG_SET(ISHARP_DELTA_DATA, 0, + ISHARP_DELTA_DATA, filter_data); + } +} // dpp401_dscl_set_isharp_filter +/** + * dpp401_dscl_program_isharp - Program isharp + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program isharp + * + */ +static void dpp401_dscl_program_isharp(struct dpp *dpp_base, + const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) + return; + + PERF_TRACE(); + dpp->scl_data = *scl_data; + // ISHARP_EN + REG_SET(ISHARP_MODE, 0, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en); + // ISHARP_NOISEDET_EN + REG_SET(ISHARP_MODE, 0, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); + // ISHARP_NOISEDET_MODE + REG_SET(ISHARP_MODE, 0, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + // ISHARP_NOISEDET_UTHRE + REG_SET(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + // ISHARP_NOISEDET_DTHRE + REG_SET(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); + REG_SET(ISHARP_MODE, 0, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); + // ISHARP_NOISEDET_UTHRE + REG_SET(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); + // ISHARP_NOISEDET_DTHRE + REG_SET(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); + // ISHARP_NOISEDET_PWL_START_IN + REG_SET(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); + // ISHARP_NOISEDET_PWL_END_IN + REG_SET(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); + // ISHARP_NOISEDET_PWL_SLOPE + REG_SET(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); + // ISHARP_LBA_MODE + REG_SET(ISHARP_MODE, 0, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); + // TODO: ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG + // ISHARP_FMT_MODE + REG_SET(ISHARP_MODE, 0, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); + // ISHARP_FMT_NORM + REG_SET(ISHARP_MODE, 0, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); + // ISHARP_DELTA_LUT + dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); + // ISHARP_NLDELTA_SCLIP_EN_P + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); + // ISHARP_NLDELTA_SCLIP_PIVOT_P + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); + // ISHARP_NLDELTA_SCLIP_SLOPE_P + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); + // ISHARP_NLDELTA_SCLIP_EN_N + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); + // ISHARP_NLDELTA_SCLIP_PIVOT_N + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); + // ISHARP_NLDELTA_SCLIP_SLOPE_N + REG_SET(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); + PERF_TRACE(); +} // dpp401_dscl_program_isharp +/** + * dpp401_dscl_set_scaler_manual_scale - Manually program scaler and line buffer + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program scaler and line buffer in manual + * scaling mode. To execute the required operations for manual scale, we need + * to disable AutoCal first. + */ +void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, + const struct scaler_data *scl_data) +{ + enum lb_memory_config lb_config; + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + const struct rect *rect = &scl_data->recout; + uint32_t mpc_width = scl_data->h_active; + uint32_t mpc_height = scl_data->v_active; + uint32_t v_num_taps = scl_data->taps.v_taps - 1; + uint32_t v_num_taps_c = scl_data->taps.v_taps_c - 1; + uint32_t h_num_taps = scl_data->taps.h_taps - 1; + uint32_t h_num_taps_c = scl_data->taps.h_taps_c - 1; + enum dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode( + dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); + bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN + && scl_data->format <= PIXEL_FORMAT_VIDEO_END; + + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) + return; + + PERF_TRACE(); + + dpp->scl_data = *scl_data; + + if (dpp->base.ctx->dc->config.use_spl) { + dscl_mode = (enum dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode; + rect = (struct rect *)&scl_data->dscl_prog_data.recout; + mpc_width = scl_data->dscl_prog_data.mpc_size.width; + mpc_height = scl_data->dscl_prog_data.mpc_size.height; + v_num_taps = scl_data->dscl_prog_data.taps.v_taps; + v_num_taps_c = scl_data->dscl_prog_data.taps.v_taps_c; + h_num_taps = scl_data->dscl_prog_data.taps.h_taps; + h_num_taps_c = scl_data->dscl_prog_data.taps.h_taps_c; + } + if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) { + if (dscl_mode != DSCL_MODE_DSCL_BYPASS) + dpp401_power_on_dscl(dpp_base, true); + } + + /* Autocal off */ + REG_SET_3(DSCL_AUTOCAL, 0, + AUTOCAL_MODE, AUTOCAL_MODE_OFF, + AUTOCAL_NUM_PIPE, 0, + AUTOCAL_PIPE_ID, 0); + + /*clean scaler boundary mode when Autocal off*/ + REG_SET(DSCL_CONTROL, 0, + SCL_BOUNDARY_MODE, 0); + + /* Recout */ + dpp401_dscl_set_recout(dpp, rect); + + /* MPC Size */ + REG_SET_2(MPC_SIZE, 0, + /* Number of horizontal pixels of MPC */ + MPC_WIDTH, mpc_width, + /* Number of vertical lines of MPC */ + MPC_HEIGHT, mpc_height); + + /* SCL mode */ + REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode); + + if (dscl_mode == DSCL_MODE_DSCL_BYPASS) { + if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) + dpp401_power_on_dscl(dpp_base, false); + return; + } + + /* LB */ + lb_config = dpp401_dscl_find_lb_memory_config(dpp, scl_data); + dpp401_dscl_set_lb(dpp, &scl_data->lb_params, lb_config); + + if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) + return; + + /* Black offsets */ + if (REG(SCL_BLACK_OFFSET)) { + if (ycbcr) + REG_SET_2(SCL_BLACK_OFFSET, 0, + SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, + SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR); + else + + REG_SET_2(SCL_BLACK_OFFSET, 0, + SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y, + SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y); + } + + /* Manually calculate scale ratio and init values */ + dpp401_dscl_set_manual_ratio_init(dpp, scl_data); + + /* HTaps/VTaps */ + REG_SET_4(SCL_TAP_CONTROL, 0, + SCL_V_NUM_TAPS, v_num_taps, + SCL_H_NUM_TAPS, h_num_taps, + SCL_V_NUM_TAPS_C, v_num_taps_c, + SCL_H_NUM_TAPS_C, h_num_taps_c); + + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); + /* Edge adaptive scaler function configuration */ + if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_easf(dpp_base, scl_data); + /* isharp configuration */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data); + PERF_TRACE(); +} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c new file mode 100644 index 000000000000..b90710726840 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -0,0 +1,747 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include <drm/display/drm_dsc_helper.h> + +#include "reg_helper.h" +#include "dcn401_dsc.h" +#include "dsc/dscc_types.h" +#include "dsc/rc_calc.h" + +static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals); + +/* Object I/F functions */ +//static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); +static void dsc401_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); +static bool dsc401_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); +static void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + struct dsc_optc_config *dsc_optc_cfg); +//static bool dsc401_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps); +static void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe); +static void dsc401_disable(struct display_stream_compressor *dsc); +static void dsc401_disconnect(struct display_stream_compressor *dsc); + +const struct dsc_funcs dcn401_dsc_funcs = { + .dsc_get_enc_caps = dsc2_get_enc_caps, + .dsc_read_state = dsc401_read_state, + .dsc_validate_stream = dsc401_validate_stream, + .dsc_set_config = dsc401_set_config, + .dsc_get_packed_pps = dsc2_get_packed_pps, + .dsc_enable = dsc401_enable, + .dsc_disable = dsc401_disable, + .dsc_disconnect = dsc401_disconnect, +}; + +/* Macro definitios for REG_SET macros*/ +#define CTX \ + dsc401->base.ctx + +#define REG(reg)\ + dsc401->dsc_regs->reg + +#undef FN +#define FN(reg_name, field_name) \ + dsc401->dsc_shift->field_name, dsc401->dsc_mask->field_name +#define DC_LOGGER \ + dsc->ctx->logger + +#define DCN401_MAX_PIXEL_CLOCK_Mhz 1188 +#define DCN401_MAX_DISPLAY_CLOCK_Mhz 1200 + +enum dsc_bits_per_comp { + DSC_BPC_8 = 8, + DSC_BPC_10 = 10, + DSC_BPC_12 = 12, + DSC_BPC_UNKNOWN +}; + +/* API functions (external or via structure->function_pointer) */ + +void dsc401_construct(struct dcn401_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn401_dsc_registers *dsc_regs, + const struct dcn401_dsc_shift *dsc_shift, + const struct dcn401_dsc_mask *dsc_mask) +{ + dsc->base.ctx = ctx; + dsc->base.inst = inst; + dsc->base.funcs = &dcn401_dsc_funcs; + + dsc->dsc_regs = dsc_regs; + dsc->dsc_shift = dsc_shift; + dsc->dsc_mask = dsc_mask; + + dsc->max_image_width = 5184; +} + +/* This returns the capabilities for a single DSC encoder engine. Number of slices and total throughput + * can be doubled, tripled etc. by using additional DSC engines. + */ +//static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) +//{ +// dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ +// +// /*dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; +// dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; +// dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; +// dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; +// +// dsc_enc_caps->lb_bit_depth = 13; +// dsc_enc_caps->is_block_pred_supported = true; +// +// dsc_enc_caps->color_formats.bits.RGB = 1; +// dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; +// dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; +// dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; +// dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; +// +// dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; +// dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; +// dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; +// +// /* Maximum total throughput with all the slices combined. This is different from how DP spec specifies it. +// * Our decoder's total throughput in Pix/s is equal to DISPCLK. This is then shared between slices. +// * The value below is the absolute maximum value. The actual throughput may be lower, but it'll always +// * be sufficient to process the input pixel rate fed into a single DSC engine. +// */ +// /*dsc_enc_caps->max_total_throughput_mps = DCN401_MAX_DISPLAY_CLOCK_Mhz; +// +// /* For pixel clock bigger than a single-pipe limit we'll need two engines, which then doubles our +// * throughput and number of slices, but also introduces a lower limit of 2 slices +// */ +// /*if (pixel_clock_100Hz >= DCN401_MAX_PIXEL_CLOCK_Mhz*10000) { +// dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 0; +// dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 = 1; +// dsc_enc_caps->max_total_throughput_mps = DCN401_MAX_DISPLAY_CLOCK_Mhz * 2; +// } +// +// dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ +// /*dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ +//} + +/* this function read dsc related register fields to be logged later in dcn10_log_hw_state + * into a dcn_dsc_state struct. + */ +static void dsc401_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s) +{ + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en); + REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width); + REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel); + REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height); + REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size); + REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); + REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height); + REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en, + DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); +} + + +static bool dsc401_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg) +{ + struct dsc_optc_config dsc_optc_cfg; + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + + if (dsc_cfg->pic_width > dsc401->max_image_width) + return false; + + return dsc_prepare_config(dsc_cfg, &dsc401->reg_vals, &dsc_optc_cfg); +} + +/* +static void dsc_config_log(struct display_stream_compressor *dsc, const struct dsc_config *config) +{ + DC_LOG_DSC("\tnum_slices_h %d", config->dc_dsc_cfg.num_slices_h); + DC_LOG_DSC("\tnum_slices_v %d", config->dc_dsc_cfg.num_slices_v); + DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", + config->dc_dsc_cfg.bits_per_pixel, + config->dc_dsc_cfg.bits_per_pixel / 16, + ((config->dc_dsc_cfg.bits_per_pixel % 16) * 10000) / 16); + DC_LOG_DSC("\tcolor_depth %d", config->color_depth); +} +*/ + +static void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + struct dsc_optc_config *dsc_optc_cfg) +{ + bool is_config_ok; + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + + DC_LOG_DSC("Setting DSC Config at DSC inst %d", dsc->inst); + dsc_config_log(dsc, dsc_cfg); + is_config_ok = dsc_prepare_config(dsc_cfg, &dsc401->reg_vals, dsc_optc_cfg); + ASSERT(is_config_ok); + DC_LOG_DSC("programming DSC Picture Parameter Set (PPS):"); + dsc_log_pps(dsc, &dsc401->reg_vals.pps); + dsc_write_to_registers(dsc, &dsc401->reg_vals); +} + +/* +static bool dsc401_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps) +{ + bool is_config_ok; + struct dsc_reg_values dsc_reg_vals; + struct dsc_optc_config dsc_optc_cfg; + + memset(&dsc_reg_vals, 0, sizeof(dsc_reg_vals)); + memset(&dsc_optc_cfg, 0, sizeof(dsc_optc_cfg)); + + DC_LOG_DSC("Getting packed DSC PPS for DSC Config:"); + dsc_config_log(dsc, dsc_cfg); + DC_LOG_DSC("DSC Picture Parameter Set (PPS):"); + is_config_ok = dsc_prepare_config(dsc_cfg, &dsc_reg_vals, &dsc_optc_cfg); + ASSERT(is_config_ok); + drm_dsc_pps_payload_pack((struct drm_dsc_picture_parameter_set *)dsc_packed_pps, &dsc_reg_vals.pps); + dsc_log_pps(dsc, &dsc_reg_vals.pps); + + return is_config_ok; +} +*/ + +static void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe) +{ + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + int dsc_clock_en; + int dsc_fw_config; + int enabled_opp_pipe; + + DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); + if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) { + DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe); + ASSERT(0); + } + + REG_UPDATE(DSC_TOP_CONTROL, + DSC_CLOCK_EN, 1); + + REG_UPDATE_2(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 1, + DSCRM_DSC_OPP_PIPE_SOURCE, opp_pipe); +} + + +static void dsc401_disable(struct display_stream_compressor *dsc) +{ + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + int dsc_clock_en; + int dsc_fw_config; + int enabled_opp_pipe; + + DC_LOG_DSC("disable DSC %d", dsc->inst); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); + if (!dsc_clock_en || !dsc_fw_config) { + DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already disabled!", dsc->inst, enabled_opp_pipe); + ASSERT(0); + } + + REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 0); + + REG_UPDATE(DSC_TOP_CONTROL, + DSC_CLOCK_EN, 0); +} + +static void dsc401_disconnect(struct display_stream_compressor *dsc) +{ + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + + DC_LOG_DSC("disconnect DSC %d", dsc->inst); + + REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 0); +} + +/* This module's internal functions */ +//static void dsc_log_pps(struct display_stream_compressor *dsc, struct drm_dsc_config *pps) +//{ +// int i; +// int bits_per_pixel = pps->bits_per_pixel; +// +// DC_LOG_DSC("\tdsc_version_major %d", pps->dsc_version_major); +// DC_LOG_DSC("\tdsc_version_minor %d", pps->dsc_version_minor); +// DC_LOG_DSC("\tbits_per_component %d", pps->bits_per_component); +// DC_LOG_DSC("\tline_buf_depth %d", pps->line_buf_depth); +// DC_LOG_DSC("\tblock_pred_enable %d", pps->block_pred_enable); +// DC_LOG_DSC("\tconvert_rgb %d", pps->convert_rgb); +// DC_LOG_DSC("\tsimple_422 %d", pps->simple_422); +// DC_LOG_DSC("\tvbr_enable %d", pps->vbr_enable); +// DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", bits_per_pixel, bits_per_pixel / 16, ((bits_per_pixel % 16) * 10000) / 16); +// DC_LOG_DSC("\tpic_height %d", pps->pic_height); +// DC_LOG_DSC("\tpic_width %d", pps->pic_width); +// DC_LOG_DSC("\tslice_height %d", pps->slice_height); +// DC_LOG_DSC("\tslice_width %d", pps->slice_width); +// DC_LOG_DSC("\tslice_chunk_size %d", pps->slice_chunk_size); +// DC_LOG_DSC("\tinitial_xmit_delay %d", pps->initial_xmit_delay); +// DC_LOG_DSC("\tinitial_dec_delay %d", pps->initial_dec_delay); +// DC_LOG_DSC("\tinitial_scale_value %d", pps->initial_scale_value); +// DC_LOG_DSC("\tscale_increment_interval %d", pps->scale_increment_interval); +// DC_LOG_DSC("\tscale_decrement_interval %d", pps->scale_decrement_interval); +// DC_LOG_DSC("\tfirst_line_bpg_offset %d", pps->first_line_bpg_offset); +// DC_LOG_DSC("\tnfl_bpg_offset %d", pps->nfl_bpg_offset); +// DC_LOG_DSC("\tslice_bpg_offset %d", pps->slice_bpg_offset); +// DC_LOG_DSC("\tinitial_offset %d", pps->initial_offset); +// DC_LOG_DSC("\tfinal_offset %d", pps->final_offset); +// DC_LOG_DSC("\tflatness_min_qp %d", pps->flatness_min_qp); +// DC_LOG_DSC("\tflatness_max_qp %d", pps->flatness_max_qp); +// /* DC_LOG_DSC("\trc_parameter_set %d", pps->rc_parameter_set); */ +// /*DC_LOG_DSC("\tnative_420 %d", pps->native_420); +// DC_LOG_DSC("\tnative_422 %d", pps->native_422); +// DC_LOG_DSC("\tsecond_line_bpg_offset %d", pps->second_line_bpg_offset); +// DC_LOG_DSC("\tnsl_bpg_offset %d", pps->nsl_bpg_offset); +// DC_LOG_DSC("\tsecond_line_offset_adj %d", pps->second_line_offset_adj); +// DC_LOG_DSC("\trc_model_size %d", pps->rc_model_size); +// DC_LOG_DSC("\trc_edge_factor %d", pps->rc_edge_factor); +// DC_LOG_DSC("\trc_quant_incr_limit0 %d", pps->rc_quant_incr_limit0); +// DC_LOG_DSC("\trc_quant_incr_limit1 %d", pps->rc_quant_incr_limit1); +// DC_LOG_DSC("\trc_tgt_offset_high %d", pps->rc_tgt_offset_high); +// DC_LOG_DSC("\trc_tgt_offset_low %d", pps->rc_tgt_offset_low); +// +// for (i = 0; i < NUM_BUF_RANGES - 1; i++) +// DC_LOG_DSC("\trc_buf_thresh[%d] %d", i, pps->rc_buf_thresh[i]); +// +// for (i = 0; i < NUM_BUF_RANGES; i++) { +// DC_LOG_DSC("\trc_range_parameters[%d].range_min_qp %d", i, pps->rc_range_params[i].range_min_qp); +// DC_LOG_DSC("\trc_range_parameters[%d].range_max_qp %d", i, pps->rc_range_params[i].range_max_qp); +// DC_LOG_DSC("\trc_range_parameters[%d].range_bpg_offset %d", i, pps->rc_range_params[i].range_bpg_offset); +// } +//} +// +//static void dsc_override_rc_params(struct rc_params *rc, const struct dc_dsc_rc_params_override *override) +//{ +// uint8_t i; +// +// rc->rc_model_size = override->rc_model_size; +// for (i = 0; i < DC_DSC_RC_BUF_THRESH_SIZE; i++) +// rc->rc_buf_thresh[i] = override->rc_buf_thresh[i]; +// for (i = 0; i < DC_DSC_QP_SET_SIZE; i++) { +// rc->qp_min[i] = override->rc_minqp[i]; +// rc->qp_max[i] = override->rc_maxqp[i]; +// rc->ofs[i] = override->rc_offset[i]; +// } +// +// rc->rc_tgt_offset_hi = override->rc_tgt_offset_hi; +// rc->rc_tgt_offset_lo = override->rc_tgt_offset_lo; +// rc->rc_edge_factor = override->rc_edge_factor; +// rc->rc_quant_incr_limit0 = override->rc_quant_incr_limit0; +// rc->rc_quant_incr_limit1 = override->rc_quant_incr_limit1; +// +// rc->initial_fullness_offset = override->initial_fullness_offset; +// rc->initial_xmit_delay = override->initial_delay; +// +// rc->flatness_min_qp = override->flatness_min_qp; +// rc->flatness_max_qp = override->flatness_max_qp; +// rc->flatness_det_thresh = override->flatness_det_thresh; +//} + +// +//static bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values *dsc_reg_vals, +// struct dsc_optc_config *dsc_optc_cfg) +//{ +// struct dsc_parameters dsc_params; +// struct rc_params rc; +// +// /* Validate input parameters */ +// /*ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_h); +// ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_v); +// ASSERT(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2); +// ASSERT(dsc_cfg->pic_width); +// ASSERT(dsc_cfg->pic_height); +// ASSERT((dsc_cfg->dc_dsc_cfg.version_minor == 1 && +// (8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13)) || +// (dsc_cfg->dc_dsc_cfg.version_minor == 2 && +// ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || +// dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))); +// ASSERT(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff); // 6.0 <= bits_per_pixel <= 63.9375 +// +// if (!dsc_cfg->dc_dsc_cfg.num_slices_v || !dsc_cfg->dc_dsc_cfg.num_slices_h || +// !(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2) || +// !dsc_cfg->pic_width || !dsc_cfg->pic_height || +// !((dsc_cfg->dc_dsc_cfg.version_minor == 1 && // v1.1 line buffer depth range: +// 8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13) || +// (dsc_cfg->dc_dsc_cfg.version_minor == 2 && // v1.2 line buffer depth range: +// ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || +// dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))) || +// !(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff)) { +// dm_output_to_console("%s: Invalid parameters\n", __func__); +// return false; +// } +// +// dsc_init_reg_values(dsc_reg_vals); +// +// /* Copy input config */ +// /*dsc_reg_vals->pixel_format = dsc_dc_pixel_encoding_to_dsc_pixel_format(dsc_cfg->pixel_encoding, dsc_cfg->dc_dsc_cfg.ycbcr422_simple); +// dsc_reg_vals->num_slices_h = dsc_cfg->dc_dsc_cfg.num_slices_h; +// dsc_reg_vals->num_slices_v = dsc_cfg->dc_dsc_cfg.num_slices_v; +// dsc_reg_vals->pps.dsc_version_minor = dsc_cfg->dc_dsc_cfg.version_minor; +// dsc_reg_vals->pps.pic_width = dsc_cfg->pic_width; +// dsc_reg_vals->pps.pic_height = dsc_cfg->pic_height; +// dsc_reg_vals->pps.bits_per_component = dsc_dc_color_depth_to_dsc_bits_per_comp(dsc_cfg->color_depth); +// dsc_reg_vals->pps.block_pred_enable = dsc_cfg->dc_dsc_cfg.block_pred_enable; +// dsc_reg_vals->pps.line_buf_depth = dsc_cfg->dc_dsc_cfg.linebuf_depth; +// dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1; +// dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; +// +// // TODO: in addition to validating slice height (pic height must be divisible by slice height), +// // see what happens when the same condition doesn't apply for slice_width/pic_width. +// dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h; +// dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; +// +// ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height); +// if (!(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height)) { +// dm_output_to_console("%s: pix height %d not divisible by num_slices_v %d\n\n", __func__, dsc_cfg->pic_height, dsc_cfg->dc_dsc_cfg.num_slices_v); +// return false; +// } +// +// dsc_reg_vals->bpp_x32 = dsc_cfg->dc_dsc_cfg.bits_per_pixel << 1; +// if (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) +// dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32; +// else +// dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32 >> 1; +// +// dsc_reg_vals->pps.convert_rgb = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB ? 1 : 0; +// dsc_reg_vals->pps.native_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422); +// dsc_reg_vals->pps.native_420 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420); +// dsc_reg_vals->pps.simple_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422); +// +// calc_rc_params(&rc, &dsc_reg_vals->pps); +// +// if (dsc_cfg->dc_dsc_cfg.rc_params_ovrd) +// dsc_override_rc_params(&rc, dsc_cfg->dc_dsc_cfg.rc_params_ovrd); +// +// if (dscc_compute_dsc_parameters(&dsc_reg_vals->pps, &rc, &dsc_params)) { +// dm_output_to_console("%s: DSC config failed\n", __func__); +// return false; +// } +// +// dsc_update_from_dsc_parameters(dsc_reg_vals, &dsc_params); +// +// dsc_optc_cfg->bytes_per_pixel = dsc_params.bytes_per_pixel; +// dsc_optc_cfg->slice_width = dsc_reg_vals->pps.slice_width; +// dsc_optc_cfg->is_pixel_format_444 = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB || +// dsc_reg_vals->pixel_format == DSC_PIXFMT_YCBCR444 || +// dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422; +// +// return true; +//} +//static enum dsc_pixel_format dsc_dc_pixel_encoding_to_dsc_pixel_format(enum dc_pixel_encoding dc_pix_enc, bool is_ycbcr422_simple) +//{ +// enum dsc_pixel_format dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; +// +// /* NOTE: We don't support DSC_PIXFMT_SIMPLE_YCBCR422 */ +// +// /*switch (dc_pix_enc) { +// case PIXEL_ENCODING_RGB: +// dsc_pix_fmt = DSC_PIXFMT_RGB; +// break; +// case PIXEL_ENCODING_YCBCR422: +// if (is_ycbcr422_simple) +// dsc_pix_fmt = DSC_PIXFMT_SIMPLE_YCBCR422; +// else +// dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR422; +// break; +// case PIXEL_ENCODING_YCBCR444: +// dsc_pix_fmt = DSC_PIXFMT_YCBCR444; +// break; +// case PIXEL_ENCODING_YCBCR420: +// dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR420; +// break; +// default: +// dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; +// break; +// } +// +// ASSERT(dsc_pix_fmt != DSC_PIXFMT_UNKNOWN); +// return dsc_pix_fmt; +//} +//static enum dsc_bits_per_comp dsc_dc_color_depth_to_dsc_bits_per_comp(enum dc_color_depth dc_color_depth) +//{ +// enum dsc_bits_per_comp bpc = DSC_BPC_UNKNOWN; +// +// switch (dc_color_depth) { +// case COLOR_DEPTH_888: +// bpc = DSC_BPC_8; +// break; +// case COLOR_DEPTH_101010: +// bpc = DSC_BPC_10; +// break; +// case COLOR_DEPTH_121212: +// bpc = DSC_BPC_12; +// break; +// default: +// bpc = DSC_BPC_UNKNOWN; +// break; +// } +// +// return bpc; +//} +//static void dsc_init_reg_values(struct dsc_reg_values *reg_vals) +//{ +// int i; +// +// memset(reg_vals, 0, sizeof(struct dsc_reg_values)); +// +// /* Non-PPS values */ +// /*reg_vals->dsc_clock_enable = 1; +// reg_vals->dsc_clock_gating_disable = 0; +// reg_vals->underflow_recovery_en = 0; +// reg_vals->underflow_occurred_int_en = 0; +// reg_vals->underflow_occurred_status = 0; +// reg_vals->ich_reset_at_eol = 0; +// reg_vals->alternate_ich_encoding_en = 0; +// reg_vals->rc_buffer_model_size = 0; +// /*reg_vals->disable_ich = 0;*/ +// /*reg_vals->dsc_dbg_en = 0; +// +// for (i = 0; i < 4; i++) +// reg_vals->rc_buffer_model_overflow_int_en[i] = 0; +// +// /* PPS values */ +// /*reg_vals->pps.dsc_version_minor = 2; +// reg_vals->pps.dsc_version_major = 1; +// reg_vals->pps.line_buf_depth = 9; +// reg_vals->pps.bits_per_component = 8; +// reg_vals->pps.block_pred_enable = 1; +// reg_vals->pps.slice_chunk_size = 0; +// reg_vals->pps.pic_width = 0; +// reg_vals->pps.pic_height = 0; +// reg_vals->pps.slice_width = 0; +// reg_vals->pps.slice_height = 0; +// reg_vals->pps.initial_xmit_delay = 170; +// reg_vals->pps.initial_dec_delay = 0; +// reg_vals->pps.initial_scale_value = 0; +// reg_vals->pps.scale_increment_interval = 0; +// reg_vals->pps.scale_decrement_interval = 0; +// reg_vals->pps.nfl_bpg_offset = 0; +// reg_vals->pps.slice_bpg_offset = 0; +// reg_vals->pps.nsl_bpg_offset = 0; +// reg_vals->pps.initial_offset = 6144; +// reg_vals->pps.final_offset = 0; +// reg_vals->pps.flatness_min_qp = 3; +// reg_vals->pps.flatness_max_qp = 12; +// reg_vals->pps.rc_model_size = 8192; +// reg_vals->pps.rc_edge_factor = 6; +// reg_vals->pps.rc_quant_incr_limit0 = 11; +// reg_vals->pps.rc_quant_incr_limit1 = 11; +// reg_vals->pps.rc_tgt_offset_low = 3; +// reg_vals->pps.rc_tgt_offset_high = 3; +//} +/* Updates dsc_reg_values::reg_vals::xxx fields based on the values from computed params. + * This is required because dscc_compute_dsc_parameters returns a modified PPS, which in turn + * affects non-PPS register values. + */ +//static void dsc_update_from_dsc_parameters(struct dsc_reg_values *reg_vals, const struct dsc_parameters *dsc_params) +//{ +// int i; +// +// reg_vals->pps = dsc_params->pps; +// +// // pps_computed will have the "expanded" values; need to shift them to make them fit for regs. +// for (i = 0; i < NUM_BUF_RANGES - 1; i++) +// reg_vals->pps.rc_buf_thresh[i] = reg_vals->pps.rc_buf_thresh[i] >> 6; +// +// reg_vals->rc_buffer_model_size = dsc_params->rc_buffer_model_size; +//} +static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals) +{ + uint32_t temp_int; + struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); + + REG_SET(DSC_DEBUG_CONTROL, 0, + DSC_DBG_EN, reg_vals->dsc_dbg_en); + + // dsccif registers + REG_SET_2(DSCCIF_CONFIG0, 0, + //INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, reg_vals->underflow_recovery_en, + //INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, reg_vals->underflow_occurred_int_en, + //INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, reg_vals->underflow_occurred_status, + INPUT_PIXEL_FORMAT, reg_vals->pixel_format, + DSCCIF_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); + + /* REG_SET_2(DSCCIF_CONFIG1, 0, + PIC_WIDTH, reg_vals->pps.pic_width, + PIC_HEIGHT, reg_vals->pps.pic_height); + */ + // dscc registers + if (dsc401->dsc_mask->ICH_RESET_AT_END_OF_LINE == 0) { + REG_SET_3(DSCC_CONFIG0, 0, + NUMBER_OF_SLICES_PER_LINE, reg_vals->num_slices_h - 1, + ALTERNATE_ICH_ENCODING_EN, reg_vals->alternate_ich_encoding_en, + NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, reg_vals->num_slices_v - 1); + } else { + REG_SET_4(DSCC_CONFIG0, 0, ICH_RESET_AT_END_OF_LINE, + reg_vals->ich_reset_at_eol, NUMBER_OF_SLICES_PER_LINE, + reg_vals->num_slices_h - 1, ALTERNATE_ICH_ENCODING_EN, + reg_vals->alternate_ich_encoding_en, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, + reg_vals->num_slices_v - 1); + } + + REG_SET(DSCC_CONFIG1, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size); + /*REG_SET_2(DSCC_CONFIG1, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size, + DSCC_DISABLE_ICH, reg_vals->disable_ich);*/ + + REG_SET_4(DSCC_INTERRUPT_CONTROL0, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0, reg_vals->rc_buffer_model_overflow_int_en[0], + DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1, reg_vals->rc_buffer_model_overflow_int_en[1], + DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2, reg_vals->rc_buffer_model_overflow_int_en[2], + DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3, reg_vals->rc_buffer_model_overflow_int_en[3]); + + REG_SET_3(DSCC_PPS_CONFIG0, 0, + DSC_VERSION_MINOR, reg_vals->pps.dsc_version_minor, + LINEBUF_DEPTH, reg_vals->pps.line_buf_depth, + DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); + + if (reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) + temp_int = reg_vals->bpp_x32; + else + temp_int = reg_vals->bpp_x32 >> 1; + + REG_SET_7(DSCC_PPS_CONFIG1, 0, + BITS_PER_PIXEL, temp_int, + SIMPLE_422, reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422, + CONVERT_RGB, reg_vals->pixel_format == DSC_PIXFMT_RGB, + BLOCK_PRED_ENABLE, reg_vals->pps.block_pred_enable, + NATIVE_422, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422, + NATIVE_420, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420, + CHUNK_SIZE, reg_vals->pps.slice_chunk_size); + + REG_SET_2(DSCC_PPS_CONFIG2, 0, + PIC_WIDTH, reg_vals->pps.pic_width, + PIC_HEIGHT, reg_vals->pps.pic_height); + + REG_SET_2(DSCC_PPS_CONFIG3, 0, + SLICE_WIDTH, reg_vals->pps.slice_width, + SLICE_HEIGHT, reg_vals->pps.slice_height); + + REG_SET(DSCC_PPS_CONFIG4, 0, + INITIAL_XMIT_DELAY, reg_vals->pps.initial_xmit_delay); + + REG_SET_2(DSCC_PPS_CONFIG5, 0, + INITIAL_SCALE_VALUE, reg_vals->pps.initial_scale_value, + SCALE_INCREMENT_INTERVAL, reg_vals->pps.scale_increment_interval); + + REG_SET_3(DSCC_PPS_CONFIG6, 0, + SCALE_DECREMENT_INTERVAL, reg_vals->pps.scale_decrement_interval, + FIRST_LINE_BPG_OFFSET, reg_vals->pps.first_line_bpg_offset, + SECOND_LINE_BPG_OFFSET, reg_vals->pps.second_line_bpg_offset); + + REG_SET_2(DSCC_PPS_CONFIG7, 0, + NFL_BPG_OFFSET, reg_vals->pps.nfl_bpg_offset, + SLICE_BPG_OFFSET, reg_vals->pps.slice_bpg_offset); + + REG_SET_2(DSCC_PPS_CONFIG8, 0, + NSL_BPG_OFFSET, reg_vals->pps.nsl_bpg_offset, + SECOND_LINE_OFFSET_ADJ, reg_vals->pps.second_line_offset_adj); + + REG_SET_2(DSCC_PPS_CONFIG9, 0, + INITIAL_OFFSET, reg_vals->pps.initial_offset, + FINAL_OFFSET, reg_vals->pps.final_offset); + + REG_SET_3(DSCC_PPS_CONFIG10, 0, + FLATNESS_MIN_QP, reg_vals->pps.flatness_min_qp, + FLATNESS_MAX_QP, reg_vals->pps.flatness_max_qp, + RC_MODEL_SIZE, reg_vals->pps.rc_model_size); + + REG_SET_5(DSCC_PPS_CONFIG11, 0, + RC_EDGE_FACTOR, reg_vals->pps.rc_edge_factor, + RC_QUANT_INCR_LIMIT0, reg_vals->pps.rc_quant_incr_limit0, + RC_QUANT_INCR_LIMIT1, reg_vals->pps.rc_quant_incr_limit1, + RC_TGT_OFFSET_LO, reg_vals->pps.rc_tgt_offset_low, + RC_TGT_OFFSET_HI, reg_vals->pps.rc_tgt_offset_high); + + REG_SET_4(DSCC_PPS_CONFIG12, 0, + RC_BUF_THRESH0, reg_vals->pps.rc_buf_thresh[0], + RC_BUF_THRESH1, reg_vals->pps.rc_buf_thresh[1], + RC_BUF_THRESH2, reg_vals->pps.rc_buf_thresh[2], + RC_BUF_THRESH3, reg_vals->pps.rc_buf_thresh[3]); + + REG_SET_4(DSCC_PPS_CONFIG13, 0, + RC_BUF_THRESH4, reg_vals->pps.rc_buf_thresh[4], + RC_BUF_THRESH5, reg_vals->pps.rc_buf_thresh[5], + RC_BUF_THRESH6, reg_vals->pps.rc_buf_thresh[6], + RC_BUF_THRESH7, reg_vals->pps.rc_buf_thresh[7]); + + REG_SET_4(DSCC_PPS_CONFIG14, 0, + RC_BUF_THRESH8, reg_vals->pps.rc_buf_thresh[8], + RC_BUF_THRESH9, reg_vals->pps.rc_buf_thresh[9], + RC_BUF_THRESH10, reg_vals->pps.rc_buf_thresh[10], + RC_BUF_THRESH11, reg_vals->pps.rc_buf_thresh[11]); + + REG_SET_5(DSCC_PPS_CONFIG15, 0, + RC_BUF_THRESH12, reg_vals->pps.rc_buf_thresh[12], + RC_BUF_THRESH13, reg_vals->pps.rc_buf_thresh[13], + RANGE_MIN_QP0, reg_vals->pps.rc_range_params[0].range_min_qp, + RANGE_MAX_QP0, reg_vals->pps.rc_range_params[0].range_max_qp, + RANGE_BPG_OFFSET0, reg_vals->pps.rc_range_params[0].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG16, 0, + RANGE_MIN_QP1, reg_vals->pps.rc_range_params[1].range_min_qp, + RANGE_MAX_QP1, reg_vals->pps.rc_range_params[1].range_max_qp, + RANGE_BPG_OFFSET1, reg_vals->pps.rc_range_params[1].range_bpg_offset, + RANGE_MIN_QP2, reg_vals->pps.rc_range_params[2].range_min_qp, + RANGE_MAX_QP2, reg_vals->pps.rc_range_params[2].range_max_qp, + RANGE_BPG_OFFSET2, reg_vals->pps.rc_range_params[2].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG17, 0, + RANGE_MIN_QP3, reg_vals->pps.rc_range_params[3].range_min_qp, + RANGE_MAX_QP3, reg_vals->pps.rc_range_params[3].range_max_qp, + RANGE_BPG_OFFSET3, reg_vals->pps.rc_range_params[3].range_bpg_offset, + RANGE_MIN_QP4, reg_vals->pps.rc_range_params[4].range_min_qp, + RANGE_MAX_QP4, reg_vals->pps.rc_range_params[4].range_max_qp, + RANGE_BPG_OFFSET4, reg_vals->pps.rc_range_params[4].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG18, 0, + RANGE_MIN_QP5, reg_vals->pps.rc_range_params[5].range_min_qp, + RANGE_MAX_QP5, reg_vals->pps.rc_range_params[5].range_max_qp, + RANGE_BPG_OFFSET5, reg_vals->pps.rc_range_params[5].range_bpg_offset, + RANGE_MIN_QP6, reg_vals->pps.rc_range_params[6].range_min_qp, + RANGE_MAX_QP6, reg_vals->pps.rc_range_params[6].range_max_qp, + RANGE_BPG_OFFSET6, reg_vals->pps.rc_range_params[6].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG19, 0, + RANGE_MIN_QP7, reg_vals->pps.rc_range_params[7].range_min_qp, + RANGE_MAX_QP7, reg_vals->pps.rc_range_params[7].range_max_qp, + RANGE_BPG_OFFSET7, reg_vals->pps.rc_range_params[7].range_bpg_offset, + RANGE_MIN_QP8, reg_vals->pps.rc_range_params[8].range_min_qp, + RANGE_MAX_QP8, reg_vals->pps.rc_range_params[8].range_max_qp, + RANGE_BPG_OFFSET8, reg_vals->pps.rc_range_params[8].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG20, 0, + RANGE_MIN_QP9, reg_vals->pps.rc_range_params[9].range_min_qp, + RANGE_MAX_QP9, reg_vals->pps.rc_range_params[9].range_max_qp, + RANGE_BPG_OFFSET9, reg_vals->pps.rc_range_params[9].range_bpg_offset, + RANGE_MIN_QP10, reg_vals->pps.rc_range_params[10].range_min_qp, + RANGE_MAX_QP10, reg_vals->pps.rc_range_params[10].range_max_qp, + RANGE_BPG_OFFSET10, reg_vals->pps.rc_range_params[10].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG21, 0, + RANGE_MIN_QP11, reg_vals->pps.rc_range_params[11].range_min_qp, + RANGE_MAX_QP11, reg_vals->pps.rc_range_params[11].range_max_qp, + RANGE_BPG_OFFSET11, reg_vals->pps.rc_range_params[11].range_bpg_offset, + RANGE_MIN_QP12, reg_vals->pps.rc_range_params[12].range_min_qp, + RANGE_MAX_QP12, reg_vals->pps.rc_range_params[12].range_max_qp, + RANGE_BPG_OFFSET12, reg_vals->pps.rc_range_params[12].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG22, 0, + RANGE_MIN_QP13, reg_vals->pps.rc_range_params[13].range_min_qp, + RANGE_MAX_QP13, reg_vals->pps.rc_range_params[13].range_max_qp, + RANGE_BPG_OFFSET13, reg_vals->pps.rc_range_params[13].range_bpg_offset, + RANGE_MIN_QP14, reg_vals->pps.rc_range_params[14].range_min_qp, + RANGE_MAX_QP14, reg_vals->pps.rc_range_params[14].range_max_qp, + RANGE_BPG_OFFSET14, reg_vals->pps.rc_range_params[14].range_bpg_offset); +} + +void dsc401_set_fgcg(struct dcn401_dsc *dsc401, bool enable) +{ + REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h new file mode 100644 index 000000000000..2143e81ca22a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DCN401_DSC_H__ +#define __DCN401_DSC_H__ + +#include "dsc.h" +#include "dsc/dscc_types.h" +#include "dcn20/dcn20_dsc.h" +#include <drm/display/drm_dsc.h> + +#define TO_DCN401_DSC(dsc)\ + container_of(dsc, struct dcn401_dsc, base) + +#define DSC_REG_LIST_SH_MASK_DCN401(mask_sh)\ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_CLOCK_EN, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DISPCLK_R_GATE_DIS, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DSCCLK_R_GATE_DIS, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_DBG_EN, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_TEST_CLOCK_MUX_SEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, ICH_RESET_AT_END_OF_LINE, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_PER_LINE, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, ALTERNATE_ICH_ENCODING_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, mask_sh), \ + /*DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_DISABLE_ICH, mask_sh),*/ \ + DSC_SF(DSCC0_DSCC_STATUS, DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_END_OF_FRAME_NOT_REACHED_CLEAR, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN0, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN1, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN2, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN3, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MINOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MAJOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, PPS_IDENTIFIER, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, LINEBUF_DEPTH, mask_sh), \ + DSC2_SF(DSCC0, DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BITS_PER_PIXEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, VBR_ENABLE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, SIMPLE_422, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CONVERT_RGB, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BLOCK_PRED_ENABLE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_422, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_420, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CHUNK_SIZE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_WIDTH, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_HEIGHT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_WIDTH, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_HEIGHT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_XMIT_DELAY, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_DEC_DELAY, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG5, INITIAL_SCALE_VALUE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG5, SCALE_INCREMENT_INTERVAL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SCALE_DECREMENT_INTERVAL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, FIRST_LINE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SECOND_LINE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG7, NFL_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG8, NSL_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG8, SECOND_LINE_OFFSET_ADJ, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG9, INITIAL_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG9, FINAL_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MIN_QP, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MAX_QP, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, RC_MODEL_SIZE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_EDGE_FACTOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_LO, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_HI, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MIN_QP0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MAX_QP0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_BPG_OFFSET0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP14, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP14, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET14, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_FORCE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_DIS, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_FORCE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_DIS, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC_R_Y_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC_R_Y_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC_G_CB_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC_G_CB_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC_B_CR_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC_B_CR_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_R_Y_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_G_CB_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR1, DSCC_B_CR_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0, mask_sh), \ + DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1, mask_sh), \ + DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2, mask_sh), \ + DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, mask_sh), \ + DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS0_ROTATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS1_ROTATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS2_ROTATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS3_ROTATE, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_PIXEL_FORMAT, mask_sh), \ + DSC2_SF(DSCCIF0, DSCCIF_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ + DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, mask_sh), \ + DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_OPP_PIPE_SOURCE, mask_sh) + +struct dcn401_dsc_registers { + uint32_t DSC_TOP_CONTROL; + uint32_t DSC_DEBUG_CONTROL; + uint32_t DSCC_CONFIG0; + uint32_t DSCC_CONFIG1; + uint32_t DSCC_STATUS; + uint32_t DSCC_INTERRUPT_CONTROL0; + uint32_t DSCC_INTERRUPT_CONTROL1; + uint32_t DSCC_INTERRUPT_STATUS0; + uint32_t DSCC_INTERRUPT_STATUS1; + uint32_t DSCC_PPS_CONFIG0; + uint32_t DSCC_PPS_CONFIG1; + uint32_t DSCC_PPS_CONFIG2; + uint32_t DSCC_PPS_CONFIG3; + uint32_t DSCC_PPS_CONFIG4; + uint32_t DSCC_PPS_CONFIG5; + uint32_t DSCC_PPS_CONFIG6; + uint32_t DSCC_PPS_CONFIG7; + uint32_t DSCC_PPS_CONFIG8; + uint32_t DSCC_PPS_CONFIG9; + uint32_t DSCC_PPS_CONFIG10; + uint32_t DSCC_PPS_CONFIG11; + uint32_t DSCC_PPS_CONFIG12; + uint32_t DSCC_PPS_CONFIG13; + uint32_t DSCC_PPS_CONFIG14; + uint32_t DSCC_PPS_CONFIG15; + uint32_t DSCC_PPS_CONFIG16; + uint32_t DSCC_PPS_CONFIG17; + uint32_t DSCC_PPS_CONFIG18; + uint32_t DSCC_PPS_CONFIG19; + uint32_t DSCC_PPS_CONFIG20; + uint32_t DSCC_PPS_CONFIG21; + uint32_t DSCC_PPS_CONFIG22; + uint32_t DSCC_MEM_POWER_CONTROL0; + uint32_t DSCC_MEM_POWER_CONTROL1; + uint32_t DSCC_R_Y_SQUARED_ERROR_LOWER; + uint32_t DSCC_R_Y_SQUARED_ERROR_UPPER; + uint32_t DSCC_G_CB_SQUARED_ERROR_LOWER; + uint32_t DSCC_G_CB_SQUARED_ERROR_UPPER; + uint32_t DSCC_B_CR_SQUARED_ERROR_LOWER; + uint32_t DSCC_B_CR_SQUARED_ERROR_UPPER; + uint32_t DSCC_MAX_ABS_ERROR0; + uint32_t DSCC_MAX_ABS_ERROR1; + uint32_t DSCC_TEST_DEBUG_BUS_ROTATE; + uint32_t DSCCIF_CONFIG0; + uint32_t DSCRM_DSC_FORWARD_CONFIG; + uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0; + uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1; + uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2; + uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3; + uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0; + uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1; + uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2; + uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3; +}; + +#define DSC_FIELD_LIST_DCN401(type)\ + DSC_FIELD_LIST_DCN20(type); \ + type DSC_FGCG_REP_DIS; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED0; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED1; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED2; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED3; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED0; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED1; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED2; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED3; \ + type DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR0; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR1; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR2; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR3; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR0; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR1; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR2; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR3; \ + type DSCC_END_OF_FRAME_NOT_REACHED_CLEAR; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED0; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED1; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED2; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED3; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR0; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR1; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR2; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR3; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN0; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN1; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN2; \ + type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN3; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN0; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN1; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN2; \ + type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN3; \ + type DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED_INT_EN; \ + type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0; \ + type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1; \ + type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2; \ + type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3; \ + type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0; \ + type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1; \ + type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2; \ + type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3 + +struct dcn401_dsc_shift { + DSC_FIELD_LIST_DCN401(uint8_t); +}; + +struct dcn401_dsc_mask { + DSC_FIELD_LIST_DCN401(uint32_t); +}; + +struct dcn401_dsc { + struct display_stream_compressor base; + const struct dcn401_dsc_registers *dsc_regs; + const struct dcn401_dsc_shift *dsc_shift; + const struct dcn401_dsc_mask *dsc_mask; + + struct dsc_reg_values reg_vals; + + int max_image_width; +}; + +void dsc401_construct(struct dcn401_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn401_dsc_registers *dsc_regs, + const struct dcn401_dsc_shift *dsc_shift, + const struct dcn401_dsc_mask *dsc_mask); + +void dsc401_set_fgcg(struct dcn401_dsc *dsc401, bool enable); + +#endif + diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c new file mode 100644 index 000000000000..46415cab23ab --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dm_services.h" +#include "include/gpio_types.h" +#include "../hw_factory.h" + + +#include "../hw_gpio.h" +#include "../hw_ddc.h" +#include "../hw_hpd.h" +#include "../hw_generic.h" + + +#include "dcn/dcn_4_1_0_offset.h" +#include "dcn/dcn_4_1_0_sh_mask.h" + +#include "reg_helper.h" +#include "../hpd_regs.h" +#include "hw_factory_dcn401.h" + +#define DCN_BASE__INST0_SEG2 0x000034C0 + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ +#define block HPD +#define reg_num 0 + +#undef BASE_INNER +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + + + +#define REG(reg_name)\ + BASE(reg ## reg_name ## _BASE_IDX) + reg ## reg_name + +#define SF_HPD(reg_name, field_name, post_fix)\ + .field_name = HPD0_ ## reg_name ## __ ## field_name ## post_fix + +#define REGI(reg_name, block, id)\ + BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +/* macros to expend register list macro defined in HW object header file + * end *********************/ + + + +#define hpd_regs(id) \ +{\ + HPD_REG_LIST(id)\ +} + +static const struct hpd_registers hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), +// hpd_regs(4), +}; + +static const struct hpd_sh_mask hpd_shift = { + HPD_MASK_SH_LIST(__SHIFT) +}; + +static const struct hpd_sh_mask hpd_mask = { + HPD_MASK_SH_LIST(_MASK) +}; + +#include "../ddc_regs.h" + + /* set field name */ +#define SF_DDC(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +static const struct ddc_registers ddc_data_regs_dcn[] = { + ddc_data_regs_dcn2(1), + ddc_data_regs_dcn2(2), + ddc_data_regs_dcn2(3), + ddc_data_regs_dcn2(4), +// ddc_data_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, + { + DDC_GPIO_VGA_REG_LIST(DATA), + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + } +}; + +static const struct ddc_registers ddc_clk_regs_dcn[] = { + ddc_clk_regs_dcn2(1), + ddc_clk_regs_dcn2(2), + ddc_clk_regs_dcn2(3), + ddc_clk_regs_dcn2(4), +// ddc_clk_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, + { + DDC_GPIO_VGA_REG_LIST(CLK), + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + } +}; + +static const struct ddc_sh_mask ddc_shift[] = { + DDC_MASK_SH_LIST_DCN2(__SHIFT, 1), + DDC_MASK_SH_LIST_DCN2(__SHIFT, 2), + DDC_MASK_SH_LIST_DCN2(__SHIFT, 3), + DDC_MASK_SH_LIST_DCN2(__SHIFT, 4), + DDC_MASK_SH_LIST_DCN2(__SHIFT, 5), + DDC_MASK_SH_LIST_DCN2(__SHIFT, 6), + DDC_MASK_SH_LIST_DCN2_VGA(__SHIFT) +}; + +static const struct ddc_sh_mask ddc_mask[] = { + DDC_MASK_SH_LIST_DCN2(_MASK, 1), + DDC_MASK_SH_LIST_DCN2(_MASK, 2), + DDC_MASK_SH_LIST_DCN2(_MASK, 3), + DDC_MASK_SH_LIST_DCN2(_MASK, 4), + DDC_MASK_SH_LIST_DCN2(_MASK, 5), + DDC_MASK_SH_LIST_DCN2(_MASK, 6), + DDC_MASK_SH_LIST_DCN2_VGA(_MASK) +}; + +#include "../generic_regs.h" + +/* set field name */ +#define SF_GENERIC(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define generic_regs(id) \ +{\ + GENERIC_REG_LIST(id)\ +} + +static const struct generic_registers generic_regs[] = { + generic_regs(A), + generic_regs(B), +}; + +static const struct generic_sh_mask generic_shift[] = { + GENERIC_MASK_SH_LIST(__SHIFT, A), + GENERIC_MASK_SH_LIST(__SHIFT, B), +}; + +static const struct generic_sh_mask generic_mask[] = { + GENERIC_MASK_SH_LIST(_MASK, A), + GENERIC_MASK_SH_LIST(_MASK, B), +}; + +static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en) +{ + struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin); + + generic->regs = &generic_regs[en]; + generic->shifts = &generic_shift[en]; + generic->masks = &generic_mask[en]; + generic->base.regs = &generic_regs[en].gpio; +} + +static void define_ddc_registers( + struct hw_gpio_pin *pin, + uint32_t en) +{ + struct hw_ddc *ddc = HW_DDC_FROM_BASE(pin); + + switch (pin->id) { + case GPIO_ID_DDC_DATA: + ddc->regs = &ddc_data_regs_dcn[en]; + ddc->base.regs = &ddc_data_regs_dcn[en].gpio; + break; + case GPIO_ID_DDC_CLOCK: + ddc->regs = &ddc_clk_regs_dcn[en]; + ddc->base.regs = &ddc_clk_regs_dcn[en].gpio; + break; + default: + ASSERT_CRITICAL(false); + return; + } + + ddc->shifts = &ddc_shift[en]; + ddc->masks = &ddc_mask[en]; + +} + +static void define_hpd_registers(struct hw_gpio_pin *pin, uint32_t en) +{ + struct hw_hpd *hpd = HW_HPD_FROM_BASE(pin); + + hpd->regs = &hpd_regs[en]; + hpd->shifts = &hpd_shift; + hpd->masks = &hpd_mask; + hpd->base.regs = &hpd_regs[en].gpio; +} + + +/* function table */ +static const struct hw_factory_funcs funcs = { + .init_ddc_data = dal_hw_ddc_init, + .init_generic = dal_hw_generic_init, + .init_hpd = dal_hw_hpd_init, + .get_ddc_pin = dal_hw_ddc_get_pin, + .get_hpd_pin = dal_hw_hpd_get_pin, + .get_generic_pin = dal_hw_generic_get_pin, + .define_hpd_registers = define_hpd_registers, + .define_ddc_registers = define_ddc_registers, + .define_generic_registers = define_generic_registers +}; + +/* + * dal_hw_factory_dcn401_init + * + * @brief + * Initialize HW factory function pointers and pin info + * + * @param + * struct hw_factory *factory - [out] struct of function pointers + */ +void dal_hw_factory_dcn401_init(struct hw_factory *factory) +{ + factory->number_of_pins[GPIO_ID_DDC_DATA] = 8; + factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8; + factory->number_of_pins[GPIO_ID_GENERIC] = 4; + factory->number_of_pins[GPIO_ID_HPD] = 5; + factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28; + factory->number_of_pins[GPIO_ID_VIP_PAD] = 0; + factory->number_of_pins[GPIO_ID_SYNC] = 0; + factory->number_of_pins[GPIO_ID_GSL] = 0;/*add this*/ + + factory->funcs = &funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h new file mode 100644 index 000000000000..22e650723ee7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DAL_HW_FACTORY_DCN401_H__ +#define __DAL_HW_FACTORY_DCN401_H__ + +/* Initialize HW factory function pointers and pin info */ +void dal_hw_factory_dcn401_init(struct hw_factory *factory); + +#endif /* __DAL_HW_FACTORY_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c new file mode 100644 index 000000000000..ea416f01f888 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "hw_translate_dcn401.h" + +#include "dm_services.h" +#include "include/gpio_types.h" +#include "../hw_translate.h" + +#include "dcn/dcn_4_1_0_offset.h" +#include "dcn/dcn_4_1_0_sh_mask.h" + +#define DCN_BASE__INST0_SEG2 0x000034C0 +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ +#define block HPD +#define reg_num 0 + +#undef BASE_INNER +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#undef REG +#define REG(reg_name)\ + BASE(reg ## reg_name ## _BASE_IDX) + reg ## reg_name +#define SF_HPD(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + + +/* macros to expend register list macro defined in HW object header file + * end *********************/ + + +static bool offset_to_id( + uint32_t offset, + uint32_t mask, + enum gpio_id *id, + uint32_t *en) +{ + switch (offset) { + /* GENERIC */ + case REG(DC_GPIO_GENERIC_A): + *id = GPIO_ID_GENERIC; + switch (mask) { + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK: + *en = GPIO_GENERIC_A; + return true; + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICB_A_MASK: + *en = GPIO_GENERIC_B; + return true; + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICC_A_MASK: + *en = GPIO_GENERIC_C; + return true; + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICD_A_MASK: + *en = GPIO_GENERIC_D; + return true; + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICE_A_MASK: + *en = GPIO_GENERIC_E; + return true; + case DC_GPIO_GENERIC_A__DC_GPIO_GENERICF_A_MASK: + *en = GPIO_GENERIC_F; + return true; + default: + ASSERT_CRITICAL(false); + return false; + } + break; + /* HPD */ + case REG(DC_GPIO_HPD_A): + *id = GPIO_ID_HPD; + switch (mask) { + case DC_GPIO_HPD_A__DC_GPIO_HPD1_A_MASK: + *en = GPIO_HPD_1; + return true; + case DC_GPIO_HPD_A__DC_GPIO_HPD2_A_MASK: + *en = GPIO_HPD_2; + return true; + case DC_GPIO_HPD_A__DC_GPIO_HPD3_A_MASK: + *en = GPIO_HPD_3; + return true; + case DC_GPIO_HPD_A__DC_GPIO_HPD4_A_MASK: + *en = GPIO_HPD_4; + return true; + case DC_GPIO_HPD_A__DC_GPIO_HPD5_A_MASK: + *en = GPIO_HPD_5; + return true; + default: + ASSERT_CRITICAL(false); + return false; + } + break; + /* REG(DC_GPIO_GENLK_MASK */ + case REG(DC_GPIO_GENLK_A): + *id = GPIO_ID_GSL; + switch (mask) { + case DC_GPIO_GENLK_A__DC_GPIO_GENLK_CLK_A_MASK: + *en = GPIO_GSL_GENLOCK_CLOCK; + return true; + case DC_GPIO_GENLK_A__DC_GPIO_GENLK_VSYNC_A_MASK: + *en = GPIO_GSL_GENLOCK_VSYNC; + return true; + case DC_GPIO_GENLK_A__DC_GPIO_SWAPLOCK_A_A_MASK: + *en = GPIO_GSL_SWAPLOCK_A; + return true; + case DC_GPIO_GENLK_A__DC_GPIO_SWAPLOCK_B_A_MASK: + *en = GPIO_GSL_SWAPLOCK_B; + return true; + default: + ASSERT_CRITICAL(false); + return false; + } + break; + /* DDC */ + /* we don't care about the GPIO_ID for DDC + * in DdcHandle it will use GPIO_ID_DDC_DATA/GPIO_ID_DDC_CLOCK + * directly in the create method + */ + case REG(DC_GPIO_DDC1_A): + *en = GPIO_DDC_LINE_DDC1; + return true; + case REG(DC_GPIO_DDC2_A): + *en = GPIO_DDC_LINE_DDC2; + return true; + case REG(DC_GPIO_DDC3_A): + *en = GPIO_DDC_LINE_DDC3; + return true; + case REG(DC_GPIO_DDC4_A): + *en = GPIO_DDC_LINE_DDC4; + return true; + case REG(DC_GPIO_DDCVGA_A): + *en = GPIO_DDC_LINE_DDC_VGA; + return true; + +/* + * case REG(DC_GPIO_I2CPAD_A): not exit + * case REG(DC_GPIO_PWRSEQ_A): + * case REG(DC_GPIO_PAD_STRENGTH_1): + * case REG(DC_GPIO_PAD_STRENGTH_2): + * case REG(DC_GPIO_DEBUG): + */ + /* UNEXPECTED */ + default: +/* case REG(DC_GPIO_SYNCA_A): not exist */ + ASSERT_CRITICAL(false); + return false; + } +} + + +static bool id_to_offset( + enum gpio_id id, + uint32_t en, + struct gpio_pin_info *info) +{ + bool result = true; + + switch (id) { + case GPIO_ID_DDC_DATA: + info->mask = DC_GPIO_DDC1_A__DC_GPIO_DDC1DATA_A_MASK; + switch (en) { + case GPIO_DDC_LINE_DDC1: + info->offset = REG(DC_GPIO_DDC1_A); + break; + case GPIO_DDC_LINE_DDC2: + info->offset = REG(DC_GPIO_DDC2_A); + break; + case GPIO_DDC_LINE_DDC3: + info->offset = REG(DC_GPIO_DDC3_A); + break; + case GPIO_DDC_LINE_DDC4: + info->offset = REG(DC_GPIO_DDC4_A); + break; +/* case GPIO_DDC_LINE_DDC5: + info->offset = REG(DC_GPIO_DDC5_A); + break; */ + case GPIO_DDC_LINE_DDC_VGA: + info->offset = REG(DC_GPIO_DDCVGA_A); + break; + case GPIO_DDC_LINE_I2C_PAD: + default: + ASSERT_CRITICAL(false); + result = false; + } + break; + case GPIO_ID_DDC_CLOCK: + info->mask = DC_GPIO_DDC1_A__DC_GPIO_DDC1CLK_A_MASK; + switch (en) { + case GPIO_DDC_LINE_DDC1: + info->offset = REG(DC_GPIO_DDC1_A); + break; + case GPIO_DDC_LINE_DDC2: + info->offset = REG(DC_GPIO_DDC2_A); + break; + case GPIO_DDC_LINE_DDC3: + info->offset = REG(DC_GPIO_DDC3_A); + break; + case GPIO_DDC_LINE_DDC4: + info->offset = REG(DC_GPIO_DDC4_A); + break; +/* case GPIO_DDC_LINE_DDC5: + info->offset = REG(DC_GPIO_DDC5_A); + break; */ + case GPIO_DDC_LINE_DDC_VGA: + info->offset = REG(DC_GPIO_DDCVGA_A); + break; + case GPIO_DDC_LINE_I2C_PAD: + default: + ASSERT_CRITICAL(false); + result = false; + } + break; + case GPIO_ID_GENERIC: + info->offset = REG(DC_GPIO_GENERIC_A); + switch (en) { + case GPIO_GENERIC_A: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK; + break; + case GPIO_GENERIC_B: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICB_A_MASK; + break; + case GPIO_GENERIC_C: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICC_A_MASK; + break; + case GPIO_GENERIC_D: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICD_A_MASK; + break; + case GPIO_GENERIC_E: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICE_A_MASK; + break; + case GPIO_GENERIC_F: + info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICF_A_MASK; + break; + default: + ASSERT_CRITICAL(false); + result = false; + } + break; + case GPIO_ID_HPD: + info->offset = REG(DC_GPIO_HPD_A); + switch (en) { + case GPIO_HPD_1: + info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD1_A_MASK; + break; + case GPIO_HPD_2: + info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD2_A_MASK; + break; + case GPIO_HPD_3: + info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD3_A_MASK; + break; + case GPIO_HPD_4: + info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD4_A_MASK; + break; + case GPIO_HPD_5: + info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD5_A_MASK; + break; + default: + ASSERT_CRITICAL(false); + result = false; + } + break; + case GPIO_ID_GSL: + switch (en) { + case GPIO_GSL_GENLOCK_CLOCK: + /*not implmented*/ + ASSERT_CRITICAL(false); + result = false; + break; + case GPIO_GSL_GENLOCK_VSYNC: + /*not implmented*/ + ASSERT_CRITICAL(false); + result = false; + break; + case GPIO_GSL_SWAPLOCK_A: + /*not implmented*/ + ASSERT_CRITICAL(false); + result = false; + break; + case GPIO_GSL_SWAPLOCK_B: + /*not implmented*/ + ASSERT_CRITICAL(false); + result = false; + + break; + default: + ASSERT_CRITICAL(false); + result = false; + } + break; + case GPIO_ID_SYNC: + case GPIO_ID_VIP_PAD: + default: + ASSERT_CRITICAL(false); + result = false; + } + + if (result) { + info->offset_y = info->offset + 2; + info->offset_en = info->offset + 1; + info->offset_mask = info->offset - 1; + + info->mask_y = info->mask; + info->mask_en = info->mask; + info->mask_mask = info->mask; + } + + return result; +} + + +/* function table */ +static const struct hw_translate_funcs funcs = { + .offset_to_id = offset_to_id, + .id_to_offset = id_to_offset, +}; + + +/* + * dal_hw_translate_dcn401_init + * + * @brief + * Initialize Hw translate function pointers. + * + * @param + * struct hw_translate *tr - [out] struct of function pointers + * + */ +void dal_hw_translate_dcn401_init(struct hw_translate *tr) +{ + tr->funcs = &funcs; +} + diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h new file mode 100644 index 000000000000..aadecb05bba1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DAL_HW_TRANSLATE_DCN401_H__ +#define __DAL_HW_TRANSLATE_DCN401_H__ + +struct hw_translate; + +/* Initialize Hw translate function pointers */ +void dal_hw_translate_dcn401_init(struct hw_translate *tr); + +#endif /* __DAL_HW_TRANSLATE_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c new file mode 100644 index 000000000000..8ce4f46b302b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -0,0 +1,1545 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dm_services.h" +#include "dm_helpers.h" +#include "core_types.h" +#include "resource.h" +#include "dccg.h" +#include "dce/dce_hwseq.h" +#include "reg_helper.h" +#include "abm.h" +#include "hubp.h" +#include "dchubbub.h" +#include "timing_generator.h" +#include "opp.h" +#include "ipp.h" +#include "mpc.h" +#include "mcif_wb.h" +#include "dc_dmub_srv.h" +#include "link_hwss.h" +#include "dpcd_defs.h" +#include "clk_mgr.h" +#include "dsc.h" +#include "link.h" + +#include "dce/dmub_hw_lock_mgr.h" +#include "dcn10/dcn10_cm_common.h" +#include "dcn20/dcn20_optc.h" +#include "dcn30/dcn30_cm_common.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn401_hwseq.h" +#include "dcn401/dcn401_resource.h" +#include "dc_state_priv.h" + +#define DC_LOGGER_INIT(logger) + +#define CTX \ + hws->ctx +#define REG(reg)\ + hws->regs->reg +#define DC_LOGGER \ + dc->ctx->logger + + +#undef FN +#define FN(reg_name, field_name) \ + hws->shifts->field_name, hws->masks->field_name + +static void dcn401_initialize_min_clocks(struct dc *dc) +{ + struct dc_clocks *clocks = &dc->current_state->bw_ctx.bw.dcn.clk; + + clocks->dcfclk_deep_sleep_khz = DCN3_2_DCFCLK_DS_INIT_KHZ; + clocks->dcfclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz * 1000; + clocks->socclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].socclk_mhz * 1000; + clocks->dramclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 1000; + clocks->dppclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dppclk_mhz * 1000; + clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000; + clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; + clocks->fclk_p_state_change_support = true; + clocks->p_state_change_support = true; + + dc->clk_mgr->funcs->update_clocks( + dc->clk_mgr, + dc->current_state, + true); +} + +void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx) +{ + unsigned int i = 0; + struct mpc_grph_gamut_adjustment mpc_adjust; + unsigned int mpcc_id = pipe_ctx->plane_res.mpcc_inst; + struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; + + //For now assert if location is not pre-blend + if (pipe_ctx->plane_state) + ASSERT(pipe_ctx->plane_state->mcm_location == MPCC_MOVABLE_CM_LOCATION_BEFORE); + + // program MPCC_MCM_FIRST_GAMUT_REMAP + memset(&mpc_adjust, 0, sizeof(mpc_adjust)); + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; + mpc_adjust.mpcc_gamut_remap_block_id = MPCC_MCM_FIRST_GAMUT_REMAP; + + if (pipe_ctx->plane_state && + pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) { + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + mpc_adjust.temperature_matrix[i] = + pipe_ctx->plane_state->gamut_remap_matrix.matrix[i]; + } + + mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); + + // program MPCC_MCM_SECOND_GAMUT_REMAP for Bypass / Disable for now + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; + mpc_adjust.mpcc_gamut_remap_block_id = MPCC_MCM_SECOND_GAMUT_REMAP; + + mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); + + // program MPCC_OGAM_GAMUT_REMAP same as is currently used on DCN3x + memset(&mpc_adjust, 0, sizeof(mpc_adjust)); + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; + mpc_adjust.mpcc_gamut_remap_block_id = MPCC_OGAM_GAMUT_REMAP; + + if (pipe_ctx->top_pipe == NULL) { + if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { + mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + mpc_adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; + } + } + + mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); +} + +struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region) +{ + struct dce_hwseq *hws = dc->hwseq; + struct ips_ono_region_state state = {0, 0}; + + switch (region) { + case 0: + /* dccg, dio, dcio */ + REG_GET_2(DOMAIN22_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 1: + /* dchubbub, dchvm, dchubbubmem */ + REG_GET_2(DOMAIN23_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 2: + /* mpc, opp, optc, dwb */ + REG_GET_2(DOMAIN24_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 3: + /* hpo */ + REG_GET_2(DOMAIN25_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 4: + /* dchubp0, dpp0 */ + REG_GET_2(DOMAIN0_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 5: + /* dsc0 */ + REG_GET_2(DOMAIN16_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 6: + /* dchubp1, dpp1 */ + REG_GET_2(DOMAIN1_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 7: + /* dsc1 */ + REG_GET_2(DOMAIN17_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 8: + /* dchubp2, dpp2 */ + REG_GET_2(DOMAIN2_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 9: + /* dsc2 */ + REG_GET_2(DOMAIN18_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 10: + /* dchubp3, dpp3 */ + REG_GET_2(DOMAIN3_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + case 11: + /* dsc3 */ + REG_GET_2(DOMAIN19_PG_STATUS, + DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state, + DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state); + break; + default: + break; + } + + return state; +} + +void dcn401_init_hw(struct dc *dc) +{ + struct abm **abms = dc->res_pool->multiple_abms; + struct dce_hwseq *hws = dc->hwseq; + struct dc_bios *dcb = dc->ctx->dc_bios; + struct resource_pool *res_pool = dc->res_pool; + int i; + int edp_num; + uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; + + if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); + + // Initialize the dccg + if (res_pool->dccg->funcs->dccg_init) + res_pool->dccg->funcs->dccg_init(res_pool->dccg); + + // Disable DMUB Initialization until IPS state programming is finalized + //if (!dcb->funcs->is_accelerated_mode(dcb)) { + // hws->funcs.bios_golden_init(dc); + //} + + // Set default OPTC memory power states + if (dc->debug.enable_mem_low_power.bits.optc) { + // Shutdown when unassigned and light sleep in VBLANK + REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1); + } + + if (dc->debug.enable_mem_low_power.bits.vga) { + // Power down VGA memory + REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1); + } + + if (dc->ctx->dc_bios->fw_info_valid) { + res_pool->ref_clocks.xtalin_clock_inKhz = + dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency; + + if (res_pool->dccg && res_pool->hubbub) { + (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg, + dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency, + &res_pool->ref_clocks.dccg_ref_clock_inKhz); + + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, + res_pool->ref_clocks.dccg_ref_clock_inKhz, + &res_pool->ref_clocks.dchub_ref_clock_inKhz); + } else { + // Not all ASICs have DCCG sw component + res_pool->ref_clocks.dccg_ref_clock_inKhz = + res_pool->ref_clocks.xtalin_clock_inKhz; + res_pool->ref_clocks.dchub_ref_clock_inKhz = + res_pool->ref_clocks.xtalin_clock_inKhz; + } + } else + ASSERT_CRITICAL(false); + + for (i = 0; i < dc->link_count; i++) { + /* Power up AND update implementation according to the + * required signal (which may be different from the + * default signal on connector). + */ + struct dc_link *link = dc->links[i]; + + link->link_enc->funcs->hw_init(link->link_enc); + + /* Check for enabled DIG to identify enabled display */ + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { + link->link_status.link_active = true; + link->phy_state.symclk_state = SYMCLK_ON_TX_ON; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } + } + + /* enable_power_gating_plane before dsc_pg_control because + * FORCEON = 1 with hw default value on bootup, resume from s3 + */ + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + + /* we want to turn off all dp displays before doing detection */ + dc->link_srv->blank_all_dp_displays(dc); + + /* If taking control over from VBIOS, we may want to optimize our first + * mode set, so we need to skip powering down pipes until we know which + * pipes we want to use. + * Otherwise, if taking control is not possible, we need to power + * everything down. + */ + if (dcb->funcs->is_accelerated_mode(dcb) || !dc->config.seamless_boot_edp_requested) { + hws->funcs.init_pipes(dc, dc->current_state); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); + + dcn401_initialize_min_clocks(dc); + + /* On HW init, allow idle optimizations after pipes have been turned off. + * + * In certain D3 cases (i.e. BOCO / BOMACO) it's possible that hardware state + * is reset (i.e. not in idle at the time hw init is called), but software state + * still has idle_optimizations = true, so we must disable idle optimizations first + * (i.e. set false), then re-enable (set true). + */ + dc_allow_idle_optimizations(dc, false); + dc_allow_idle_optimizations(dc, true); + } + + /* In headless boot cases, DIG may be turned + * on which causes HW/SW discrepancies. + * To avoid this, power down hardware on boot + * if DIG is turned on and seamless boot not enabled + */ + if (!dc->config.seamless_boot_edp_requested) { + struct dc_link *edp_links[MAX_NUM_EDP]; + struct dc_link *edp_link; + + dc_get_edp_links(dc, edp_links, &edp_num); + if (edp_num) { + for (i = 0; i < edp_num; i++) { + edp_link = edp_links[i]; + if (edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwss.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwss.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); + } + } + } else { + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; + + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc) && + dc->hwss.power_down) { + dc->hwss.power_down(dc); + break; + } + + } + } + } + + for (i = 0; i < res_pool->audio_count; i++) { + struct audio *audio = res_pool->audios[i]; + + audio->funcs->hw_init(audio); + } + + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; + + if (link->panel_cntl) { + backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (abms[i] != NULL && abms[i]->funcs != NULL) + abms[i]->funcs->abm_init(abms[i], backlight, user_level); + } + + /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ + REG_WRITE(DIO_MEM_PWR_CTRL, 0); + + if (!dc->debug.disable_clock_gate) { + /* enable all DCN clock gating */ + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); + } + + if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) + dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); + + if (dc->clk_mgr->funcs->notify_wm_ranges) + dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); + + if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); + + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, false, false); + + if (dc->res_pool->hubbub->funcs->init_crb) + dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub); + + if (dc->res_pool->hubbub->funcs->set_request_limit && dc->config.sdpif_request_limit_words_per_umc > 0) + dc->res_pool->hubbub->funcs->set_request_limit(dc->res_pool->hubbub, dc->ctx->dc_bios->vram_info.num_chans, dc->config.sdpif_request_limit_words_per_umc); + + // Get DMCUB capabilities + if (dc->ctx->dmub_srv) { + dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv); + dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; + dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; + dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; + if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { + /* update bounding box if FAMS2 disabled */ + dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + } + } +} + +static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ctx *pipe_ctx, + enum MCM_LUT_XABLE *shaper_xable, + enum MCM_LUT_XABLE *lut3d_xable, + enum MCM_LUT_XABLE *lut1d_xable) +{ + enum dc_cm2_shaper_3dlut_setting shaper_3dlut_setting = DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL; + bool lut1d_enable = false; + struct mpc *mpc = dc->res_pool->mpc; + int mpcc_id = pipe_ctx->plane_res.hubp->inst; + + if (!pipe_ctx->plane_state) + return; + shaper_3dlut_setting = pipe_ctx->plane_state->mcm_shaper_3dlut_setting; + lut1d_enable = pipe_ctx->plane_state->mcm_lut1d_enable; + mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); + pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; + + *lut1d_xable = lut1d_enable ? MCM_LUT_ENABLE : MCM_LUT_DISABLE; + + switch (shaper_3dlut_setting) { + case DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL: + *lut3d_xable = *shaper_xable = MCM_LUT_DISABLE; + break; + case DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER: + *lut3d_xable = MCM_LUT_DISABLE; + *shaper_xable = MCM_LUT_ENABLE; + break; + case DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT: + *lut3d_xable = *shaper_xable = MCM_LUT_ENABLE; + break; + } +} + +void dcn401_populate_mcm_luts(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_cm2_func_luts mcm_luts, + bool lut_bank_a) +{ + struct dpp *dpp_base = pipe_ctx->plane_res.dpp; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + int mpcc_id = hubp->inst; + struct mpc *mpc = dc->res_pool->mpc; + union mcm_lut_params m_lut_params; + enum dc_cm2_transfer_func_source lut3d_src = mcm_luts.lut3d_data.lut3d_src; + enum hubp_3dlut_fl_format format; + enum hubp_3dlut_fl_mode mode; + enum hubp_3dlut_fl_width width; + enum hubp_3dlut_fl_addressing_mode addr_mode; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r; + enum MCM_LUT_XABLE shaper_xable, lut3d_xable, lut1d_xable; + + dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); + + /* 1D LUT */ + if (mcm_luts.lut1d_func) { + memset(&m_lut_params, 0, sizeof(m_lut_params)); + if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) + m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; + else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) { + cm_helper_translate_curve_to_hw_format( + dc->ctx, + mcm_luts.lut1d_func, + &dpp_base->regamma_params, false); + m_lut_params.pwl = &dpp_base->regamma_params; + } + if (m_lut_params.pwl) { + if (mpc->funcs->populate_lut) + mpc->funcs->populate_lut(mpc, MCM_LUT_1DLUT, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->program_lut_mode) + mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, lut1d_xable, lut_bank_a, mpcc_id); + } + } + + /* Shaper */ + if (mcm_luts.shaper) { + memset(&m_lut_params, 0, sizeof(m_lut_params)); + if (mcm_luts.shaper->type == TF_TYPE_HWPWL) + m_lut_params.pwl = &mcm_luts.shaper->pwl; + else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { + ASSERT(false); + cm_helper_translate_curve_to_hw_format( + dc->ctx, + mcm_luts.shaper, + &dpp_base->regamma_params, true); + m_lut_params.pwl = &dpp_base->regamma_params; + } + if (m_lut_params.pwl) { + if (mpc->funcs->populate_lut) + mpc->funcs->populate_lut(mpc, MCM_LUT_SHAPER, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->program_lut_mode) + mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, shaper_xable, lut_bank_a, mpcc_id); + } + } + + /* 3DLUT */ + switch (lut3d_src) { + case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: + memset(&m_lut_params, 0, sizeof(m_lut_params)); + if (hubp->funcs->hubp_enable_3dlut_fl) + hubp->funcs->hubp_enable_3dlut_fl(hubp, false); + if (mcm_luts.lut3d_data.lut3d_func && mcm_luts.lut3d_data.lut3d_func->state.bits.initialized) { + m_lut_params.lut3d = &mcm_luts.lut3d_data.lut3d_func->lut_3d; + if (mpc->funcs->populate_lut) + mpc->funcs->populate_lut(mpc, MCM_LUT_3DLUT, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->program_lut_mode) + mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, + mpcc_id); + } + break; + case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + + if (mpc->funcs->program_lut_read_write_control) + mpc->funcs->program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, mpcc_id); + if (mpc->funcs->program_lut_mode) + mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, mpcc_id); + if (hubp->funcs->hubp_program_3dlut_fl_addr) + hubp->funcs->hubp_program_3dlut_fl_addr(hubp, mcm_luts.lut3d_data.gpu_mem_params.addr); + switch (mcm_luts.lut3d_data.gpu_mem_params.layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + mode = hubp_3dlut_fl_mode_native_1; + addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + mode = hubp_3dlut_fl_mode_native_2; + addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + mode = hubp_3dlut_fl_mode_transform; + addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear; + break; + default: + mode = hubp_3dlut_fl_mode_disable; + addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + } + if (hubp->funcs->hubp_program_3dlut_fl_mode) + hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode); + + if (hubp->funcs->hubp_program_3dlut_fl_addressing_mode) + hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode); + + switch (mcm_luts.lut3d_data.gpu_mem_params.format_params.format) { + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: + default: + format = hubp_3dlut_fl_format_unorm_12msb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: + format = hubp_3dlut_fl_format_unorm_12lsb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10: + format = hubp_3dlut_fl_format_float_fp1_5_10; + break; + } + if (hubp->funcs->hubp_program_3dlut_fl_format) + hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); + if (hubp->funcs->hubp_update_3dlut_fl_bias_scale) + hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale); + + switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) { + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: + default: + crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; + crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; + break; + } + + if (hubp->funcs->hubp_program_3dlut_fl_crossbar) + hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, + crossbar_bit_slice_y_g, + crossbar_bit_slice_cb_b, + crossbar_bit_slice_cr_r); + + switch (mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_171717: + default: + width = hubp_3dlut_fl_width_17; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + width = hubp_3dlut_fl_width_transformed; + break; + } + if (hubp->funcs->hubp_program_3dlut_fl_width) + hubp->funcs->hubp_program_3dlut_fl_width(hubp, width); + if (mpc->funcs->update_3dlut_fast_load_select) + mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); + + if (hubp->funcs->hubp_enable_3dlut_fl) + hubp->funcs->hubp_enable_3dlut_fl(hubp, true); + else { + if (mpc->funcs->program_lut_mode) { + mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_DISABLE, lut_bank_a, mpcc_id); + mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, MCM_LUT_DISABLE, lut_bank_a, mpcc_id); + mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, MCM_LUT_DISABLE, lut_bank_a, mpcc_id); + } + } + break; + + } +} + +bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, + const struct dc_plane_state *plane_state) +{ + struct dpp *dpp_base = pipe_ctx->plane_res.dpp; + int mpcc_id = pipe_ctx->plane_res.hubp->inst; + struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; + bool result = true; + const struct pwl_params *lut_params = NULL; + + mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); + pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; + // 1D LUT + if (!plane_state->mcm_lut1d_enable) { + if (plane_state->blend_tf.type == TF_TYPE_HWPWL) + lut_params = &plane_state->blend_tf.pwl; + else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { + cm_helper_translate_curve_to_hw_format(plane_state->ctx, + &plane_state->blend_tf, + &dpp_base->regamma_params, false); + lut_params = &dpp_base->regamma_params; + } + result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); + lut_params = NULL; + } + + // Shaper + if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) + lut_params = &plane_state->in_shaper_func.pwl; + else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { + // TODO: dpp_base replace + ASSERT(false); + cm_helper_translate_curve_to_hw_format(plane_state->ctx, + &plane_state->in_shaper_func, + &dpp_base->shaper_params, true); + lut_params = &dpp_base->shaper_params; + } + + result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); + } + + // 3D + if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (plane_state->lut3d_func.state.bits.initialized == 1) + result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); + else + result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); + } + + return result; +} + +bool dcn401_set_output_transfer_func(struct dc *dc, + struct pipe_ctx *pipe_ctx, + const struct dc_stream_state *stream) +{ + int mpcc_id = pipe_ctx->plane_res.hubp->inst; + struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; + const struct pwl_params *params = NULL; + bool ret = false; + + /* program OGAM or 3DLUT only for the top pipe*/ + if (resource_is_pipe_type(pipe_ctx, OPP_HEAD)) { + /*program shaper and 3dlut in MPC*/ + ret = dcn32_set_mpc_shaper_3dlut(pipe_ctx, stream); + if (ret == false && mpc->funcs->set_output_gamma) { + if (stream->out_transfer_func.type == TF_TYPE_HWPWL) + params = &stream->out_transfer_func.pwl; + else if (pipe_ctx->stream->out_transfer_func.type == + TF_TYPE_DISTRIBUTED_POINTS && + cm3_helper_translate_curve_to_hw_format( + &stream->out_transfer_func, + &mpc->blender_params, false)) + params = &mpc->blender_params; + /* there are no ROM LUTs in OUTGAM */ + if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED) + BREAK_TO_DEBUGGER(); + } + } + + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; +} + +void dcn401_calculate_dccg_tmds_div_value(struct pipe_ctx *pipe_ctx, + unsigned int *tmds_div) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + + if (dc_is_tmds_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) { + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + *tmds_div = PIXEL_RATE_DIV_BY_2; + else + *tmds_div = PIXEL_RATE_DIV_BY_4; + } else { + *tmds_div = PIXEL_RATE_DIV_BY_1; + } + + if (*tmds_div == PIXEL_RATE_DIV_NA) + ASSERT(false); + +} + +static void enable_stream_timing_calc( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc, + unsigned int *tmds_div, + int *opp_inst, + int *opp_cnt, + bool *manual_mode, + struct drr_params *params, + unsigned int *event_triggers) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct pipe_ctx *odm_pipe; + + if (dc_is_tmds_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) + dcn401_calculate_dccg_tmds_div_value(pipe_ctx, tmds_div); + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { + opp_inst[*opp_cnt] = odm_pipe->stream_res.opp->inst; + (*opp_cnt)++; + } + + if (dc_is_tmds_signal(stream->signal)) { + stream->link->phy_state.symclk_ref_cnts.otg = 1; + if (stream->link->phy_state.symclk_state == SYMCLK_OFF_TX_OFF) + stream->link->phy_state.symclk_state = SYMCLK_ON_TX_OFF; + else + stream->link->phy_state.symclk_state = SYMCLK_ON_TX_ON; + } + + params->vertical_total_min = stream->adjust.v_total_min; + params->vertical_total_max = stream->adjust.v_total_max; + params->vertical_total_mid = stream->adjust.v_total_mid; + params->vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num; + + // DRR should set trigger event to monitor surface update event + if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) + *event_triggers = 0x80; +} + +enum dc_status dcn401_enable_stream_timing( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc) +{ + struct dce_hwseq *hws = dc->hwseq; + struct dc_stream_state *stream = pipe_ctx->stream; + struct drr_params params = {0}; + unsigned int event_triggers = 0; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + int opp_inst[MAX_PIPES] = { pipe_ctx->stream_res.opp->inst }; + bool manual_mode; + unsigned int tmds_div = PIXEL_RATE_DIV_NA; + unsigned int unused_div = PIXEL_RATE_DIV_NA; + + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) + return DC_OK; + + enable_stream_timing_calc(pipe_ctx, context, dc, &tmds_div, opp_inst, + &opp_cnt, &manual_mode, ¶ms, &event_triggers); + + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) { + dc->res_pool->dccg->funcs->set_pixel_rate_div( + dc->res_pool->dccg, pipe_ctx->stream_res.tg->inst, + tmds_div, unused_div); + } + + /* TODO check if timing_changed, disable stream if timing changed */ + + if (opp_cnt > 1) + pipe_ctx->stream_res.tg->funcs->set_odm_combine( + pipe_ctx->stream_res.tg, + opp_inst, opp_cnt, + &pipe_ctx->stream->timing); + + /* HW program guide assume display already disable + * by unplug sequence. OTG assume stop. + */ + pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, true); + + if (false == pipe_ctx->clock_source->funcs->program_pix_clk( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + dc->link_srv->dp_get_encoding_format(&pipe_ctx->link_config.dp_link_settings), + &pipe_ctx->pll_settings)) { + BREAK_TO_DEBUGGER(); + return DC_ERROR_UNEXPECTED; + } + + if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal))) + dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->program_timing( + pipe_ctx->stream_res.tg, + &stream->timing, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->stream->signal, + true); + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( + odm_pipe->stream_res.opp, + true); + + pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control( + pipe_ctx->stream_res.opp, + true); + + hws->funcs.blank_pixel_data(dc, pipe_ctx, true); + + /* VTG is within DCHUB command block. DCFCLK is always on */ + if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) { + BREAK_TO_DEBUGGER(); + return DC_ERROR_UNEXPECTED; + } + + hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp); + + if (pipe_ctx->stream_res.tg->funcs->set_drr) + pipe_ctx->stream_res.tg->funcs->set_drr( + pipe_ctx->stream_res.tg, ¶ms); + + /* Event triggers and num frames initialized for DRR, but can be + * later updated for PSR use. Note DRR trigger events are generated + * regardless of whether num frames met. + */ + if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx->stream_res.tg, event_triggers, 2); + + /* TODO program crtc source select for non-virtual signal*/ + /* TODO program FMT */ + /* TODO setup link_enc */ + /* TODO set stream attributes */ + /* TODO program audio */ + /* TODO enable stream if timing changed */ + /* TODO unblank stream if DP */ + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { + if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable) + pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg); + } + + return DC_OK; +} + +static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link) +{ + switch (link->link_enc->transmitter) { + case TRANSMITTER_UNIPHY_A: + return PHYD32CLKA; + case TRANSMITTER_UNIPHY_B: + return PHYD32CLKB; + case TRANSMITTER_UNIPHY_C: + return PHYD32CLKC; + case TRANSMITTER_UNIPHY_D: + return PHYD32CLKD; + case TRANSMITTER_UNIPHY_E: + return PHYD32CLKE; + default: + return PHYD32CLKA; + } +} + +static void dcn401_enable_stream_calc( + struct pipe_ctx *pipe_ctx, + int *dp_hpo_inst, + enum phyd32clk_clock_source *phyd32clk, + unsigned int *tmds_div, + uint32_t *early_control) +{ + + struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; + enum dc_lane_count lane_count = + pipe_ctx->stream->link->cur_link_settings.lane_count; + uint32_t active_total_with_borders; + + if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) + *dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + + *phyd32clk = get_phyd32clk_src(pipe_ctx->stream->link); + + if (dc_is_tmds_signal(pipe_ctx->stream->signal)) + dcn401_calculate_dccg_tmds_div_value(pipe_ctx, tmds_div); + else + *tmds_div = PIXEL_RATE_DIV_BY_1; + + /* enable early control to avoid corruption on DP monitor*/ + active_total_with_borders = + timing->h_addressable + + timing->h_border_left + + timing->h_border_right; + + if (lane_count != 0) + *early_control = active_total_with_borders % lane_count; + + if (*early_control == 0) + *early_control = lane_count; + +} + +void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) +{ + uint32_t early_control = 0; + struct timing_generator *tg = pipe_ctx->stream_res.tg; + struct dc_link *link = pipe_ctx->stream->link; + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dccg *dccg = dc->res_pool->dccg; + enum phyd32clk_clock_source phyd32clk; + int dp_hpo_inst; + unsigned int tmds_div = PIXEL_RATE_DIV_NA; + unsigned int unused_div = PIXEL_RATE_DIV_NA; + + dcn401_enable_stream_calc(pipe_ctx, &dp_hpo_inst, &phyd32clk, + &tmds_div, &early_control); + + if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { + if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { + dccg->funcs->set_dpstreamclk(dccg, DPREFCLK, tg->inst, dp_hpo_inst); + + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } else { + /* need to set DTBCLK_P source to DPREFCLK for DP8B10B */ + dccg->funcs->set_dtbclk_p_src(dccg, DPREFCLK, tg->inst); + } + } + + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) { + dc->res_pool->dccg->funcs->set_pixel_rate_div( + dc->res_pool->dccg, + pipe_ctx->stream_res.tg->inst, + tmds_div, + unused_div); + } + + link_hwss->setup_stream_encoder(pipe_ctx); + + if (pipe_ctx->plane_state && pipe_ctx->plane_state->flip_immediate != 1) { + if (dc->hwss.program_dmdata_engine) + dc->hwss.program_dmdata_engine(pipe_ctx); + } + + dc->hwss.update_info_frame(pipe_ctx); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_UPDATE_INFO_FRAME); + + tg->funcs->set_early_control(tg, early_control); + + if (dc->hwseq->funcs.set_pixels_per_cycle) + dc->hwseq->funcs.set_pixels_per_cycle(pipe_ctx); +} + +void dcn401_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable) +{ + REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, enable); +} + +static bool dcn401_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) +{ + struct pipe_ctx *test_pipe, *split_pipe; + const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data; + struct rect r1 = scl_data->recout, r2, r2_half; + int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b; + int cur_layer = pipe_ctx->plane_state->layer_index; + + /** + * Disable the cursor if there's another pipe above this with a + * plane that contains this pipe's viewport to prevent double cursor + * and incorrect scaling artifacts. + */ + for (test_pipe = pipe_ctx->top_pipe; test_pipe; + test_pipe = test_pipe->top_pipe) { + // Skip invisible layer and pipe-split plane on same layer + if (!test_pipe->plane_state || + !test_pipe->plane_state->visible || + test_pipe->plane_state->layer_index == cur_layer) + continue; + + r2 = test_pipe->plane_res.scl_data.recout; + r2_r = r2.x + r2.width; + r2_b = r2.y + r2.height; + split_pipe = test_pipe; + + /** + * There is another half plane on same layer because of + * pipe-split, merge together per same height. + */ + for (split_pipe = pipe_ctx->top_pipe; split_pipe; + split_pipe = split_pipe->top_pipe) + if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) { + r2_half = split_pipe->plane_res.scl_data.recout; + r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x; + r2.width = r2.width + r2_half.width; + r2_r = r2.x + r2.width; + break; + } + + if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b) + return true; + } + + return false; +} + +void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) +{ + struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; + struct hubp *hubp = pipe_ctx->plane_res.hubp; + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dc_cursor_mi_param param = { + .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10, + .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz, + .viewport = pipe_ctx->plane_res.scl_data.viewport, + .recout = pipe_ctx->plane_res.scl_data.recout, + .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, + .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, + .rotation = pipe_ctx->plane_state->rotation, + .mirror = pipe_ctx->plane_state->horizontal_mirror + }; + bool pipe_split_on = false; + bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || + (pipe_ctx->prev_odm_pipe != NULL); + int prev_odm_width = 0; + int next_odm_width = 0; + + int x_pos = pos_cpy.x; + int y_pos = pos_cpy.y; + + if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) { + if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) || + (pipe_ctx->plane_state->src_rect.height != pipe_ctx->plane_res.scl_data.viewport.height)) { + pipe_split_on = true; + } + } + + /** + * DCN4 moved cursor composition after Scaler, so in HW it is in + * recout space and for HW Cursor position programming need to + * translate to recout space. + * + * Cursor X and Y position programmed into HW can't be negative, + * in fact it is X, Y coordinate shifted for the HW Cursor Hot spot + * position that goes into HW X and Y coordinates while HW Hot spot + * X and Y coordinates are length relative to the cursor top left + * corner, hotspot must be smaller than the cursor size. + * + * DMs/DC interface for Cursor position is in stream->src space, and + * DMs supposed to transform Cursor coordinates to stream->src space, + * then here we need to translate Cursor coordinates to stream->dst + * space, as now in HW, Cursor coordinates are in per pipe recout + * space, and for the given pipe valid coordinates are only in range + * from 0,0 - recout width, recout height space. + * If certain pipe combining is in place, need to further adjust per + * pipe to make sure each pipe enabling cursor on its part of the + * screen. + */ + + if (param.rotation == ROTATION_ANGLE_90 || param.rotation == ROTATION_ANGLE_270) { + x_pos = x_pos * pipe_ctx->stream->dst.width / + pipe_ctx->stream->src.height; + y_pos = y_pos * pipe_ctx->stream->dst.height / + pipe_ctx->stream->src.width; + } else { + x_pos = x_pos * pipe_ctx->stream->dst.width / + pipe_ctx->stream->src.width; + y_pos = y_pos * pipe_ctx->stream->dst.height / + pipe_ctx->stream->src.height; + } + + /** + * If the cursor's source viewport is clipped then we need to + * translate the cursor to appear in the correct position on + * the screen. + * + * This translation isn't affected by scaling so it needs to be + * done *after* we adjust the position for the scale factor. + * + * This is only done by opt-in for now since there are still + * some usecases like tiled display that might enable the + * cursor on both streams while expecting dc to clip it. + */ + if (pos_cpy.translate_by_source) { + x_pos += pipe_ctx->plane_state->src_rect.x; + y_pos += pipe_ctx->plane_state->src_rect.y; + } + + /* Adjust for ODM Combine */ + if (odm_combine_on) { + struct pipe_ctx *next_odm_pipe = pipe_ctx->next_odm_pipe; + struct pipe_ctx *prev_odm_pipe = pipe_ctx->prev_odm_pipe; + + while (next_odm_pipe != NULL) { + next_odm_width += next_odm_pipe->plane_res.scl_data.recout.width; + next_odm_pipe = next_odm_pipe->next_odm_pipe; + } + while (prev_odm_pipe != NULL) { + prev_odm_width += prev_odm_pipe->plane_res.scl_data.recout.width; + prev_odm_pipe = prev_odm_pipe->prev_odm_pipe; + } + + if (param.rotation == ROTATION_ANGLE_0) { + x_pos -= prev_odm_width; + } + } + + /** + * If the position is negative then we need to add to the hotspot + * to shift the cursor outside the plane. + */ + + if (x_pos < 0) { + pos_cpy.x_hotspot -= x_pos; + x_pos = 0; + } + + if (y_pos < 0) { + pos_cpy.y_hotspot -= y_pos; + y_pos = 0; + } + + pos_cpy.x = (uint32_t)x_pos; + pos_cpy.y = (uint32_t)y_pos; + + if (pos_cpy.enable && dcn401_can_pipe_disable_cursor(pipe_ctx)) + pos_cpy.enable = false; + + if (param.rotation == ROTATION_ANGLE_0) { + int recout_width = + pipe_ctx->plane_res.scl_data.recout.width; + int recout_x = + pipe_ctx->plane_res.scl_data.recout.x; + + if (param.mirror) { + if (pipe_split_on || odm_combine_on) { + if (pos_cpy.x >= recout_width + recout_x) { + pos_cpy.x = 2 * recout_width + - pos_cpy.x + 2 * recout_x; + } else { + uint32_t temp_x = pos_cpy.x; + + pos_cpy.x = 2 * recout_x - pos_cpy.x; + if (temp_x >= recout_x + + (int)hubp->curs_attr.width || pos_cpy.x + <= (int)hubp->curs_attr.width + + pipe_ctx->plane_state->src_rect.x) { + pos_cpy.x = 2 * recout_width - temp_x; + } + } + } else { + pos_cpy.x = recout_width - pos_cpy.x + 2 * recout_x; + } + } + } else if (param.rotation == ROTATION_ANGLE_90) { + uint32_t temp_y = pos_cpy.y; + + pos_cpy.y = pipe_ctx->plane_res.scl_data.recout.height - pos_cpy.x; + pos_cpy.x = temp_y - prev_odm_width; + } else if (param.rotation == ROTATION_ANGLE_270) { + // Swap axis and mirror vertically + uint32_t temp_x = pos_cpy.x; + + int recout_height = + pipe_ctx->plane_res.scl_data.recout.height; + int recout_y = + pipe_ctx->plane_res.scl_data.recout.y; + + /** + * Display groups that are 1xnY, have pos_cpy.x > 2 * recout.height + * For pipe split cases: + * - apply offset of recout.y to normalize pos_cpy.x + * - calculate the pos_cpy.y as before + * - shift pos_cpy.y back by same offset to get final value + * - since we iterate through both pipes, use the lower + * recout.y for offset + * For non pipe split cases, use the same calculation for + * pos_cpy.y as the 180 degree rotation case below, + * but use pos_cpy.x as our input because we are rotating + * 270 degrees + */ + if (pipe_split_on || odm_combine_on) { + int pos_cpy_x_offset; + int other_pipe_recout_y; + + if (pipe_split_on) { + if (pipe_ctx->bottom_pipe) { + other_pipe_recout_y = + pipe_ctx->bottom_pipe->plane_res.scl_data.recout.y; + } else { + other_pipe_recout_y = + pipe_ctx->top_pipe->plane_res.scl_data.recout.y; + } + pos_cpy_x_offset = (recout_y > other_pipe_recout_y) ? + other_pipe_recout_y : recout_y; + pos_cpy.x -= pos_cpy_x_offset; + if (pos_cpy.x > recout_height) { + pos_cpy.x = pos_cpy.x - recout_height; + pos_cpy.y = recout_height - pos_cpy.x; + } else { + pos_cpy.y = 2 * recout_height - pos_cpy.x; + } + pos_cpy.y += pos_cpy_x_offset; + + } else { + pos_cpy.x = pipe_ctx->plane_res.scl_data.recout.width + next_odm_width - pos_cpy.y; + pos_cpy.y = temp_x; + } + } else { + pos_cpy.x = pipe_ctx->plane_res.scl_data.recout.width - pos_cpy.y; + pos_cpy.y = temp_x; + } + } else if (param.rotation == ROTATION_ANGLE_180) { + // Mirror horizontally and vertically + int recout_width = + pipe_ctx->plane_res.scl_data.recout.width; + int recout_x = + pipe_ctx->plane_res.scl_data.recout.x; + + if (!param.mirror) { + if (odm_combine_on) { + pos_cpy.x = pipe_ctx->plane_res.scl_data.recout.width + next_odm_width - pos_cpy.x; + } else if (pipe_split_on) { + if (pos_cpy.x >= recout_width + recout_x) { + pos_cpy.x = 2 * recout_width + - pos_cpy.x + 2 * recout_x; + } else { + uint32_t temp_x = pos_cpy.x; + + pos_cpy.x = 2 * recout_x - pos_cpy.x; + if (temp_x >= recout_x + + (int)hubp->curs_attr.width || pos_cpy.x + <= (int)hubp->curs_attr.width + + pipe_ctx->plane_state->src_rect.x) { + pos_cpy.x = temp_x + recout_width; + } + } + } else { + pos_cpy.x = recout_width - pos_cpy.x + 2 * recout_x; + } + } + + /** + * Display groups that are 1xnY, have pos_cpy.y > recout.height + * Calculation: + * delta_from_bottom = recout.y + recout.height - pos_cpy.y + * pos_cpy.y_new = recout.y + delta_from_bottom + * Simplify it as: + * pos_cpy.y = recout.y * 2 + recout.height - pos_cpy.y + */ + pos_cpy.y = (2 * pipe_ctx->plane_res.scl_data.recout.y) + + pipe_ctx->plane_res.scl_data.recout.height - pos_cpy.y; + } + + hubp->funcs->set_cursor_position(hubp, &pos_cpy, ¶m); + dpp->funcs->set_cursor_position(dpp, &pos_cpy, ¶m, hubp->curs_attr.width, hubp->curs_attr.height); +} + +static bool dcn401_check_no_memory_request_for_cab(struct dc *dc) +{ + int i; + + /* First, check no-memory-request case */ + for (i = 0; i < dc->current_state->stream_count; i++) { + if ((dc->current_state->stream_status[i].plane_count) && + (dc->current_state->streams[i]->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED)) + /* Fail eligibility on a visible stream */ + return false; + } + + return true; +} + +static uint32_t dcn401_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx) +{ + int i; + uint8_t num_ways = 0; + uint32_t mall_ss_size_bytes = 0; + + mall_ss_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_size_bytes; + // TODO add additional logic for PSR active stream exclusion optimization + // mall_ss_psr_active_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes; + + // Include cursor size for CAB allocation + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[i]; + + if (!pipe->stream || !pipe->plane_state) + continue; + + mall_ss_size_bytes += dcn32_helper_calculate_mall_bytes_for_cursor(dc, pipe, false); + } + + // Convert number of cache lines required to number of ways + if (dc->debug.force_mall_ss_num_ways > 0) + num_ways = dc->debug.force_mall_ss_num_ways; + else if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) + num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, mall_ss_size_bytes); + else + num_ways = 0; + + return num_ways; +} + +bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) +{ + union dmub_rb_cmd cmd; + uint8_t ways, i; + int j; + bool mall_ss_unsupported = false; + struct dc_plane_state *plane = NULL; + + if (!dc->ctx->dmub_srv || !dc->current_state) + return false; + + for (i = 0; i < dc->current_state->stream_count; i++) { + /* MALL SS messaging is not supported with PSR at this time */ + if (dc->current_state->streams[i] != NULL && + dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) + return false; + } + + memset(&cmd, 0, sizeof(cmd)); + cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS; + cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); + + if (enable) { + if (dcn401_check_no_memory_request_for_cab(dc)) { + /* 1. Check no memory request case for CAB. + * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message + */ + cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; + } else { + /* 2. Check if all surfaces can fit in CAB. + * If surfaces can fit into CAB, send CAB_ACTION_ALLOW DMUB message + * and configure HUBP's to fetch from MALL + */ + ways = dcn401_calculate_cab_allocation(dc, dc->current_state); + + /* MALL not supported with Stereo3D or TMZ surface. If any plane is using stereo, + * or TMZ surface, don't try to enter MALL. + */ + for (i = 0; i < dc->current_state->stream_count; i++) { + for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) { + plane = dc->current_state->stream_status[i].plane_states[j]; + + if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO || + plane->address.tmz_surface) { + mall_ss_unsupported = true; + break; + } + } + if (mall_ss_unsupported) + break; + } + if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) { + cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; + cmd.cab.cab_alloc_ways = ways; + } else { + cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB; + } + } + } else { + /* Disable CAB */ + cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION; + } + + dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + + return true; +} + +void dcn401_prepare_bandwidth(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; + unsigned int compbuf_size_kb = 0; + + /* Any transition into or out of a FAMS config should disable MCLK switching first to avoid hangs */ + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + dc->optimized_required = true; + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + } + + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); + + /* Increase clocks */ + dc->clk_mgr->funcs->update_clocks( + dc->clk_mgr, + context, + false); + + /* program dchubbub watermarks: + * For assigning wm_optimized_required, use |= operator since we don't want + * to clear the value if the optimize has not happened yet + */ + dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + &context->bw_ctx.bw.dcn.watermarks, + dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, + false); + + /* decrease compbuf size */ + if (hubbub->funcs->program_compbuf_segments) { + compbuf_size_kb = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + + hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size_kb, false); + } + + if (dc->debug.fams2_config.bits.enable) { + dcn401_fams2_global_control_lock(dc, context, true); + dcn401_fams2_update_config(dc, context, false); + dcn401_fams2_global_control_lock(dc, context, false); + } + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* After disabling P-State, restore the original value to ensure we get the correct P-State + * on the next optimize. */ + context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; + } +} + +void dcn401_optimize_bandwidth( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* enable fams2 if needed */ + if (dc->debug.fams2_config.bits.enable) { + dcn401_fams2_global_control_lock(dc, context, true); + dcn401_fams2_update_config(dc, context, true); + dcn401_fams2_global_control_lock(dc, context, false); + } + + /* program dchubbub watermarks */ + hubbub->funcs->program_watermarks(hubbub, + &context->bw_ctx.bw.dcn.watermarks, + dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, + true); + + if (dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && + context->bw_ctx.bw.dcn.clk.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + + /* increase compbuf size */ + if (hubbub->funcs->program_compbuf_segments) + hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); + + dc->clk_mgr->funcs->update_clocks( + dc->clk_mgr, + context, + true); + if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank + && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max + && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total) + pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, + pipe_ctx->dlg_regs.min_dst_y_next_start); + } + } +} + +void dcn401_fams2_global_control_lock(struct dc *dc, + struct dc_state *context, + bool lock) +{ + /* use always for now */ + union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; + + if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) + return; + + hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK; + hw_lock_cmd.bits.hw_lock_client = HW_LOCK_CLIENT_DRIVER; + hw_lock_cmd.bits.lock = lock; + hw_lock_cmd.bits.should_release = !lock; + dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd); +} + +void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params) +{ + struct dc *dc = params->fams2_global_control_lock_fast_params.dc; + bool lock = params->fams2_global_control_lock_fast_params.lock; + + if (params->fams2_global_control_lock_fast_params.is_required) { + union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; + + hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK; + hw_lock_cmd.bits.hw_lock_client = HW_LOCK_CLIENT_DRIVER; + hw_lock_cmd.bits.lock = lock; + hw_lock_cmd.bits.should_release = !lock; + dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd); + } +} + +void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable) +{ + bool fams2_required; + + if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) + return; + + fams2_required = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + + dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h new file mode 100644 index 000000000000..e70ac1f6e68b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_HWSS_DCN401_H__ +#define __DC_HWSS_DCN401_H__ + +#include "inc/core_types.h" +#include "dc.h" +#include "dc_stream.h" +#include "hw_sequencer_private.h" +#include "dcn401/dcn401_dccg.h" + +struct dc; + +enum ips_ono_state { + ONO_ON = 0, + ONO_ON_IN_PROGRESS = 1, + ONO_OFF = 2, + ONO_OFF_IN_PROGRESS = 3 +}; + +struct ips_ono_region_state { + /** + * @desire_pwr_state: desired power state based on configured value + */ + uint32_t desire_pwr_state; + /** + * @current_pwr_state: current power gate status + */ + uint32_t current_pwr_state; +}; + +void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx); + +void dcn401_init_hw(struct dc *dc); + +bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, + const struct dc_plane_state *plane_state); +bool dcn401_set_output_transfer_func(struct dc *dc, + struct pipe_ctx *pipe_ctx, + const struct dc_stream_state *stream); +void dcn401_calculate_dccg_tmds_div_value(struct pipe_ctx *pipe_ctx, + unsigned int *tmds_div); +enum dc_status dcn401_enable_stream_timing( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc); +void dcn401_enable_stream(struct pipe_ctx *pipe_ctx); +void dcn401_populate_mcm_luts(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_cm2_func_luts mcm_luts, + bool lut_bank_a); +void dcn401_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable); + +void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx); + +bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); + +struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, + uint8_t region); + +void dcn401_prepare_bandwidth(struct dc *dc, + struct dc_state *context); + +void dcn401_optimize_bandwidth( + struct dc *dc, + struct dc_state *context); + +void dcn401_fams2_global_control_lock(struct dc *dc, + struct dc_state *context, + bool lock); +void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable); +void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params); + +#endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c new file mode 100644 index 000000000000..c051c1cd0665 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn401/dcn401_hwseq.h" +#include "dcn401_init.h" + +static const struct hw_sequencer_funcs dcn401_funcs = { + .program_gamut_remap = dcn401_program_gamut_remap, + .init_hw = dcn401_init_hw, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn401_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn32_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn32_interdependent_update_lock, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn401_prepare_bandwidth, + .optimize_bandwidth = dcn401_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn31_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .set_cursor_position = dcn401_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn20_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .apply_idle_power_optimizations = dcn401_apply_idle_power_optimizations, + .does_plane_fit_in_mall = NULL, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .hardware_release = dcn30_hardware_release, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn32_disable_link_output, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .enable_phantom_streams = dcn32_enable_phantom_streams, + .disable_phantom_streams = dcn32_disable_phantom_streams, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .update_phantom_vp_position = dcn32_update_phantom_vp_position, + .update_dsc_pg = dcn32_update_dsc_pg, + .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, + .blank_phantom = dcn32_blank_phantom, + .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, + .fams2_global_control_lock = dcn401_fams2_global_control_lock, + .fams2_update_config = dcn401_fams2_update_config, + .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, +}; + +static const struct hwseq_private_funcs dcn401_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn32_set_input_transfer_func, + .set_output_transfer_func = dcn401_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn401_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn32_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn32_enable_power_gating_plane, + .hubp_pg_control = dcn32_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn32_update_odm, + .dsc_pg_control = dcn32_dsc_pg_control, + .dsc_pg_status = dcn32_dsc_pg_status, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_mcm_luts = dcn401_set_mcm_luts, + .program_mall_pipe_config = dcn32_program_mall_pipe_config, + .update_force_pstate = dcn32_update_force_pstate, + .update_mall_sel = dcn32_update_mall_sel, + .setup_hpo_hw_control = dcn401_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = NULL, + .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, + .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, + .populate_mcm_luts = dcn401_populate_mcm_luts, +}; + +void dcn401_hw_sequencer_init_functions(struct dc *dc) +{ + dc->hwss = dcn401_funcs; + dc->hwseq->funcs = dcn401_private_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h new file mode 100644 index 000000000000..59e6d8525e19 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_DCN401_INIT_H__ +#define __DC_DCN401_INIT_H__ + +struct dc; + +void dcn401_hw_sequencer_init_functions(struct dc *dc); + +#endif /* __DC_DCN401_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c new file mode 100644 index 000000000000..0b3d4616b774 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dm_services.h" +#include "include/logger_interface.h" +#include "../dce110/irq_service_dce110.h" + +#include "dcn/dcn_3_2_0_offset.h" +#include "dcn/dcn_3_2_0_sh_mask.h" + +#include "irq_service_dcn401.h" + +#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" + +#define DCN_BASE__INST0_SEG2 0x000034C0 + +static enum dc_irq_source to_dal_irq_source_dcn401( + struct irq_service *irq_service, + uint32_t src_id, + uint32_t ext_id) +{ + switch (src_id) { + case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK1; + case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK2; + case DCN_1_0__SRCID__DC_D3_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK3; + case DCN_1_0__SRCID__DC_D4_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK4; + case DCN_1_0__SRCID__DC_D5_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK5; + case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP: + return DC_IRQ_SOURCE_VBLANK6; + case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC1_VLINE0; + case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC2_VLINE0; + case DCN_1_0__SRCID__OTG3_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC3_VLINE0; + case DCN_1_0__SRCID__OTG4_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC4_VLINE0; + case DCN_1_0__SRCID__OTG5_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC5_VLINE0; + case DCN_1_0__SRCID__OTG6_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC6_VLINE0; + case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP1; + case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP2; + case DCN_1_0__SRCID__HUBP2_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP3; + case DCN_1_0__SRCID__HUBP3_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP4; + case DCN_1_0__SRCID__HUBP4_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP5; + case DCN_1_0__SRCID__HUBP5_FLIP_INTERRUPT: + return DC_IRQ_SOURCE_PFLIP6; + case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE1; + case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE2; + case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE3; + case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE4; + case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE5; + case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT: + return DC_IRQ_SOURCE_VUPDATE6; + case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT: + return DC_IRQ_SOURCE_DMCUB_OUTBOX; + + case DCN_1_0__SRCID__DC_HPD1_INT: + /* generic src_id for all HPD and HPDRX interrupts */ + switch (ext_id) { + case DCN_1_0__CTXID__DC_HPD1_INT: + return DC_IRQ_SOURCE_HPD1; + case DCN_1_0__CTXID__DC_HPD2_INT: + return DC_IRQ_SOURCE_HPD2; + case DCN_1_0__CTXID__DC_HPD3_INT: + return DC_IRQ_SOURCE_HPD3; + case DCN_1_0__CTXID__DC_HPD4_INT: + return DC_IRQ_SOURCE_HPD4; + case DCN_1_0__CTXID__DC_HPD5_INT: + return DC_IRQ_SOURCE_HPD5; + case DCN_1_0__CTXID__DC_HPD6_INT: + return DC_IRQ_SOURCE_HPD6; + case DCN_1_0__CTXID__DC_HPD1_RX_INT: + return DC_IRQ_SOURCE_HPD1RX; + case DCN_1_0__CTXID__DC_HPD2_RX_INT: + return DC_IRQ_SOURCE_HPD2RX; + case DCN_1_0__CTXID__DC_HPD3_RX_INT: + return DC_IRQ_SOURCE_HPD3RX; + case DCN_1_0__CTXID__DC_HPD4_RX_INT: + return DC_IRQ_SOURCE_HPD4RX; + case DCN_1_0__CTXID__DC_HPD5_RX_INT: + return DC_IRQ_SOURCE_HPD5RX; + case DCN_1_0__CTXID__DC_HPD6_RX_INT: + return DC_IRQ_SOURCE_HPD6RX; + default: + return DC_IRQ_SOURCE_INVALID; + } + break; + + default: + return DC_IRQ_SOURCE_INVALID; + } +} + +static bool hpd_ack( + struct irq_service *irq_service, + const struct irq_source_info *info) +{ + uint32_t addr = info->status_reg; + uint32_t value = dm_read_reg(irq_service->ctx, addr); + uint32_t current_status = + get_reg_field_value( + value, + HPD0_DC_HPD_INT_STATUS, + DC_HPD_SENSE_DELAYED); + + dal_irq_service_ack_generic(irq_service, info); + + value = dm_read_reg(irq_service->ctx, info->enable_reg); + + set_reg_field_value( + value, + current_status ? 0 : 1, + HPD0_DC_HPD_INT_CONTROL, + DC_HPD_INT_POLARITY); + + dm_write_reg(irq_service->ctx, info->enable_reg, value); + + return true; +} + +static struct irq_source_info_funcs hpd_irq_info_funcs = { + .set = NULL, + .ack = hpd_ack +}; + +static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs pflip_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vblank_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs outbox_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vline0_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +#undef BASE_INNER +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +/* compile time expand base address. */ +#define BASE(seg) \ + BASE_INNER(seg) + +#define SRI(reg_name, block, id)\ + BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_DMUB(reg_name)\ + BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define IRQ_REG_ENTRY(block, reg_num, reg1, mask1, reg2, mask2)\ + .enable_reg = SRI(reg1, block, reg_num),\ + .enable_mask = \ + block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\ + .enable_value = {\ + block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\ + ~block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK \ + },\ + .ack_reg = SRI(reg2, block, reg_num),\ + .ack_mask = \ + block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK,\ + .ack_value = \ + block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK \ + +#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\ + .enable_reg = SRI_DMUB(reg1),\ + .enable_mask = \ + reg1 ## __ ## mask1 ## _MASK,\ + .enable_value = {\ + reg1 ## __ ## mask1 ## _MASK,\ + ~reg1 ## __ ## mask1 ## _MASK \ + },\ + .ack_reg = SRI_DMUB(reg2),\ + .ack_mask = \ + reg2 ## __ ## mask2 ## _MASK,\ + .ack_value = \ + reg2 ## __ ## mask2 ## _MASK \ + +#define hpd_int_entry(reg_num)\ + [DC_IRQ_SOURCE_HPD1 + reg_num] = {\ + IRQ_REG_ENTRY(HPD, reg_num,\ + DC_HPD_INT_CONTROL, DC_HPD_INT_EN,\ + DC_HPD_INT_CONTROL, DC_HPD_INT_ACK),\ + .status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num),\ + .funcs = &hpd_irq_info_funcs\ + } + +#define hpd_rx_int_entry(reg_num)\ + [DC_IRQ_SOURCE_HPD1RX + reg_num] = {\ + IRQ_REG_ENTRY(HPD, reg_num,\ + DC_HPD_INT_CONTROL, DC_HPD_RX_INT_EN,\ + DC_HPD_INT_CONTROL, DC_HPD_RX_INT_ACK),\ + .status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num),\ + .funcs = &hpd_rx_irq_info_funcs\ + } +#define pflip_int_entry(reg_num)\ + [DC_IRQ_SOURCE_PFLIP1 + reg_num] = {\ + IRQ_REG_ENTRY(HUBPREQ, reg_num,\ + DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK,\ + DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_CLEAR),\ + .funcs = &pflip_irq_info_funcs\ + } + +/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic + * of DCE's DC_IRQ_SOURCE_VUPDATEx. + */ +#define vupdate_no_lock_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\ + .funcs = &vupdate_no_lock_irq_info_funcs\ + } + +#define vblank_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ + .funcs = &vblank_irq_info_funcs\ + } +#define vline0_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ + .funcs = &vline0_irq_info_funcs\ + } +#define dmub_outbox_int_entry()\ + [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\ + IRQ_REG_ENTRY_DMUB(\ + DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\ + DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\ + .funcs = &outbox_irq_info_funcs\ + } + +#define dummy_irq_entry() \ + {\ + .funcs = &dummy_irq_info_funcs\ + } + +#define i2c_int_entry(reg_num) \ + [DC_IRQ_SOURCE_I2C_DDC ## reg_num] = dummy_irq_entry() + +#define dp_sink_int_entry(reg_num) \ + [DC_IRQ_SOURCE_DPSINK ## reg_num] = dummy_irq_entry() + +#define gpio_pad_int_entry(reg_num) \ + [DC_IRQ_SOURCE_GPIOPAD ## reg_num] = dummy_irq_entry() + +#define dc_underflow_int_entry(reg_num) \ + [DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry() + +static struct irq_source_info_funcs dummy_irq_info_funcs = { + .set = dal_irq_service_dummy_set, + .ack = dal_irq_service_dummy_ack +}; + +static const struct irq_source_info +irq_source_info_dcn401[DAL_IRQ_SOURCES_NUMBER] = { + [DC_IRQ_SOURCE_INVALID] = dummy_irq_entry(), + hpd_int_entry(0), + hpd_int_entry(1), + hpd_int_entry(2), + hpd_int_entry(3), + hpd_int_entry(4), + hpd_rx_int_entry(0), + hpd_rx_int_entry(1), + hpd_rx_int_entry(2), + hpd_rx_int_entry(3), + hpd_rx_int_entry(4), + i2c_int_entry(1), + i2c_int_entry(2), + i2c_int_entry(3), + i2c_int_entry(4), + i2c_int_entry(5), + i2c_int_entry(6), + dp_sink_int_entry(1), + dp_sink_int_entry(2), + dp_sink_int_entry(3), + dp_sink_int_entry(4), + dp_sink_int_entry(5), + dp_sink_int_entry(6), + [DC_IRQ_SOURCE_TIMER] = dummy_irq_entry(), + pflip_int_entry(0), + pflip_int_entry(1), + pflip_int_entry(2), + pflip_int_entry(3), + [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), + [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), + [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), + gpio_pad_int_entry(0), + gpio_pad_int_entry(1), + gpio_pad_int_entry(2), + gpio_pad_int_entry(3), + gpio_pad_int_entry(4), + gpio_pad_int_entry(5), + gpio_pad_int_entry(6), + gpio_pad_int_entry(7), + gpio_pad_int_entry(8), + gpio_pad_int_entry(9), + gpio_pad_int_entry(10), + gpio_pad_int_entry(11), + gpio_pad_int_entry(12), + gpio_pad_int_entry(13), + gpio_pad_int_entry(14), + gpio_pad_int_entry(15), + gpio_pad_int_entry(16), + gpio_pad_int_entry(17), + gpio_pad_int_entry(18), + gpio_pad_int_entry(19), + gpio_pad_int_entry(20), + gpio_pad_int_entry(21), + gpio_pad_int_entry(22), + gpio_pad_int_entry(23), + gpio_pad_int_entry(24), + gpio_pad_int_entry(25), + gpio_pad_int_entry(26), + gpio_pad_int_entry(27), + gpio_pad_int_entry(28), + gpio_pad_int_entry(29), + gpio_pad_int_entry(30), + dc_underflow_int_entry(1), + dc_underflow_int_entry(2), + dc_underflow_int_entry(3), + dc_underflow_int_entry(4), + dc_underflow_int_entry(5), + dc_underflow_int_entry(6), + [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), + [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), + vblank_int_entry(0), + vblank_int_entry(1), + vblank_int_entry(2), + vblank_int_entry(3), + vline0_int_entry(0), + vline0_int_entry(1), + vline0_int_entry(2), + vline0_int_entry(3), + [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), + [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), + dmub_outbox_int_entry(), +}; + +static const struct irq_service_funcs irq_service_funcs_dcn401 = { + .to_dal_irq_source = to_dal_irq_source_dcn401 +}; + +static void dcn401_irq_construct( + struct irq_service *irq_service, + struct irq_service_init_data *init_data) +{ + dal_irq_service_construct(irq_service, init_data); + + irq_service->info = irq_source_info_dcn401; + irq_service->funcs = &irq_service_funcs_dcn401; +} + +struct irq_service *dal_irq_service_dcn401_create( + struct irq_service_init_data *init_data) +{ + struct irq_service *irq_service = kzalloc(sizeof(*irq_service), + GFP_KERNEL); + + if (!irq_service) + return NULL; + + dcn401_irq_construct(irq_service, init_data); + return irq_service; +} diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h new file mode 100644 index 000000000000..221959aa6fc7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DAL_IRQ_SERVICE_DCN401_H__ +#define __DAL_IRQ_SERVICE_DCN401_H__ + +#include "../irq_service.h" + +struct irq_service *dal_irq_service_dcn401_create( + struct irq_service_init_data *init_data); + +#endif /* __DAL_IRQ_SERVICE_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c new file mode 100644 index 000000000000..3c7b0624acea --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dcn401_optc.h" +#include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" +#include "dcn32/dcn32_optc.h" +#include "reg_helper.h" +#include "dc.h" +#include "dcn_calc_math.h" +#include "dc_dmub_srv.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +/* + * OPTC uses ODM_MEM sub block to merge pixel data coming from different OPPs + * into unified memory location per horizontal line. ODM_MEM contains shared + * memory resources global to the ASIC. Each memory resource is capable of + * storing 2048 pixels independent from actual pixel data size. Total number of + * memory allocated must be even. The memory resource allocation is described in + * a memory bit map per OPTC instance. Driver has to make sure that there is no + * double allocation across different OPTC instances. Bit offset in the map + * represents memory instance id. Driver allocates a memory instance to the + * current OPTC by setting the bit with offset associated with the desired + * memory instance to 1 in the current OPTC memory map register. + * + * It is upto software to decide how to allocate the shared memory resources + * across different OPTC instances. Driver understands that the total number + * of memory available is always 2 times the max number of OPP pipes. So each + * OPP pipe can be mapped 2 pieces of memory. However there exists cases such as + * 11520x2160 which could use 6 pieces of memory for 2 OPP pipes i.e. 3 pieces + * for each OPP pipe. + * + * Driver will reserve the first and second preferred memory instances for each + * OPP pipe. For example, OPP0's first and second preferred memory is ODM_MEM0 + * and ODM_MEM1. OPP1's first and second preferred memory is ODM_MEM2 and + * ODM_MEM3 so on so forth. + * + * Driver will first allocate from first preferred memory instances associated + * with current OPP pipes in use. If needed driver will then allocate from + * second preferred memory instances associated with current OPP pipes in use. + * Finally if still needed, driver will allocate from second preferred memory + * instances not associated with current OPP pipes. So if memory instances are + * enough other OPTCs can still allocate from their OPPs' first preferred memory + * instances without worrying about double allocation. + */ + +static uint32_t decide_odm_mem_bit_map(int *opp_id, int opp_cnt, int h_active) +{ + bool first_preferred_memory_for_opp[MAX_PIPES] = {0}; + bool second_preferred_memory_for_opp[MAX_PIPES] = {0}; + uint32_t memory_bit_map = 0; + int total_required = ((h_active + 4095) / 4096) * 2; + int total_allocated = 0; + int i; + + for (i = 0; i < opp_cnt; i++) { + first_preferred_memory_for_opp[opp_id[i]] = true; + total_allocated++; + if (total_required == total_allocated) + break; + } + + if (total_required > total_allocated) { + for (i = 0; i < opp_cnt; i++) { + second_preferred_memory_for_opp[opp_id[i]] = true; + total_allocated++; + if (total_required == total_allocated) + break; + } + } + + if (total_required > total_allocated) { + for (i = 0; i < MAX_PIPES; i++) { + if (second_preferred_memory_for_opp[i] == false) { + second_preferred_memory_for_opp[i] = true; + total_allocated++; + if (total_required == total_allocated) + break; + } + } + } + ASSERT(total_required == total_allocated); + + for (i = 0; i < MAX_PIPES; i++) { + if (first_preferred_memory_for_opp[i]) + memory_bit_map |= 0x1 << (i * 2); + if (second_preferred_memory_for_opp[i]) + memory_bit_map |= 0x2 << (i * 2); + } + + return memory_bit_map; +} + +static void optc401_set_odm_combine(struct timing_generator *optc, int *opp_id, + int opp_cnt, struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t h_active = timing->h_addressable + + timing->h_border_left + timing->h_border_right; + uint32_t odm_segment_width = h_active / opp_cnt; + uint32_t odm_segment_width_last = + h_active - odm_segment_width * (opp_cnt - 1); + uint32_t odm_mem_bit_map = decide_odm_mem_bit_map( + opp_id, opp_cnt, h_active); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, odm_mem_bit_map); + + switch (opp_cnt) { + case 2: /* ODM Combine 2:1 */ + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, odm_segment_width); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY2); + break; + case 3: /* ODM Combine 3:1 */ + REG_SET_4(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 2, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2]); + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, odm_segment_width); + REG_UPDATE(OPTC_WIDTH_CONTROL2, + OPTC_SEGMENT_WIDTH_LAST, + odm_segment_width_last); + /* In ODM combine 3:1 mode ODM packs 4 pixels per data transfer + * so OTG_H_TIMING_DIV_MODE should be configured to + * H_TIMING_DIV_BY4 even though ODM combines 3 OPP inputs, it + * outputs 4 pixels from single OPP at a time. + */ + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY4); + break; + case 4: /* ODM Combine 4:1 */ + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, odm_segment_width); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY4); + break; + default: + ASSERT(false); + } +; + optc1->opp_count = opp_cnt; +} + +static void optc401_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} +/** + * Enable CRTC + * Enable CRTC - call ASIC Control Object to enable Timing generator. + */ +static bool optc401_enable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 2, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc */ +static bool optc401_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + + REG_UPDATE(OPTC_MEMORY_CONFIG, + OPTC_MEM_SEL, 0); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 150000); + + return true; +} + +static void optc401_phantom_crtc_post_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Disable immediately. */ + REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); +} + +static void optc401_disable_phantom_otg(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); +} + +static void optc401_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +/* only to be used when FAMS2 is disabled or unsupported */ +void optc401_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.fams_ver == 1 && !dc->debug.disable_fams) + /* FAMS */ + dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); + else { + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + } +} + +void optc401_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + struct drr_params amended_params = { 0 }; + bool program_manual_trigger = false; + + if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + amended_params.vertical_total_max = params->vertical_total_max - 1; + amended_params.vertical_total_min = params->vertical_total_min - 1; + if (params->vertical_total_mid != 0) { + amended_params.vertical_total_mid = params->vertical_total_mid - 1; + amended_params.vertical_total_mid_frame_num = params->vertical_total_mid_frame_num; + } + program_manual_trigger = true; + } + + dc_dmub_srv_fams2_drr_update(dc, optc->inst, + amended_params.vertical_total_min, + amended_params.vertical_total_max, + amended_params.vertical_total_mid, + amended_params.vertical_total_mid_frame_num, + program_manual_trigger); + } else { + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + optc401_setup_manual_trigger(optc); + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } + } +} + +static void optc401_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* 00 - OTG_CONTROL_OTG_OUT_MUX_0 : Connects to DIO. + 01 - OTG_CONTROL_OTG_OUT_MUX_1 : Reserved. + 02 - OTG_CONTROL_OTG_OUT_MUX_2 : Connects to HPO. + */ + REG_UPDATE(OTG_CONTROL, OTG_OUT_MUX, dest); +} + +void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) +{ + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + /* FAMS2 */ + dc_dmub_srv_fams2_drr_update(dc, optc->inst, + vtotal_min, + vtotal_max, + 0, + 0, + false); + } else if (dc->caps.dmub_caps.fams_ver == 1 && !dc->debug.disable_fams) { + /* FAMS */ + dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max); + } else { + optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max); + } +} + +static struct timing_generator_funcs dcn401_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc401_enable_crtc, + .disable_crtc = optc401_disable_crtc, + .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable, + .disable_phantom_crtc = optc401_disable_phantom_otg, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc401_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc3_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc1_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc401_set_odm_bypass, + .set_odm_combine = optc401_set_odm_combine, + .set_h_timing_div_manual_mode = optc401_set_h_timing_div_manual_mode, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc401_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, +}; + +void dcn401_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn401_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} + diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h new file mode 100644 index 000000000000..1671fdd5061c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_OPTC_DCN401_H__ +#define __DC_OPTC_DCN401_H__ + +#include "dcn10/dcn10_optc.h" + +#define OPTC_COMMON_MASK_SH_LIST_DCN401(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL2, OPTC_SEGMENT_WIDTH_LAST, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +void dcn401_timing_generator_init(struct optc *optc1); + +void optc401_set_drr( + struct timing_generator *optc, + const struct drr_params *params); +void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); +void optc401_setup_manual_trigger(struct timing_generator *optc); + +#endif /* __DC_OPTC_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c new file mode 100644 index 000000000000..ac93c8b9361b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -0,0 +1,2118 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dm_services.h" +#include "dc.h" + +#include "dcn32/dcn32_init.h" +#include "dcn401/dcn401_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn401_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn401/dcn401_hubbub.h" +#include "dcn401/dcn401_mpc.h" +#include "dcn401/dcn401_hubp.h" +#include "irq/dcn401/irq_service_dcn401.h" +#include "dcn401/dcn401_dpp.h" +#include "dcn401/dcn401_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn20/dcn20_opp.h" +#include "dcn401/dcn401_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn401/dcn401_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn32/dcn32_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn401/dcn401_dio_link_encoder.h" +#include "dcn10/dcn10_link_encoder.h" +#include "dcn321/dcn321_dio_link_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dml/display_mode_vba.h" +#include "dcn401/dcn401_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "link.h" +#include "link_enc_cfg.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn32/dcn32_mmhubbub.h" + +#include "dcn/dcn_4_1_0_offset.h" +#include "dcn/dcn_4_1_0_sh_mask.h" +#include "nbif/nbif_6_3_1_offset.h" + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" +#include "dml/dcn401/dcn401_fpu.h" + +#include "dc_state_priv.h" + +#include "dml2/dml2_wrapper.h" + +#define DC_LOGGER_INIT(logger) + +enum dcn401_clk_src_array_id { + DCN401_CLK_SRC_PLL0, + DCN401_CLK_SRC_PLL1, + DCN401_CLK_SRC_PLL2, + DCN401_CLK_SRC_PLL3, + //DCN401_CLK_SRC_PLL4, + DCN401_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SR_ARR_INIT(reg_name, id, value)\ + REG_STRUCT[id].reg_name = value + +#define SRI(reg_name, block, id)\ + REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +/* + * Used when a reg_name would otherwise begin with an integer + */ +#define SRI_ARR_US(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## reg_name +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ + REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SRI2_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_ARR_2(reg_name, block, id, inst)\ + REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define DCCG_SRII(reg_name, block, id)\ + REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define VUPDATE_SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name +#define NBIO_SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name + +#define CTX ctx +#define REG(reg_name) \ + (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + +static struct bios_registers bios_regs; + +#define bios_regs_init() \ + NBIO_SR(BIOS_SCRATCH_3),\ + NBIO_SR(BIOS_SCRATCH_6) + +#define clk_src_regs_init(index, pllid)\ + CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) + +static struct dce110_clk_src_regs clk_src_regs[5]; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK) +}; + +#define abm_regs_init(id)\ + ABM_DCN401_REG_LIST_RI(id) + +static struct dce_abm_registers abm_regs[4]; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN401(_MASK) +}; + +#define audio_regs_init(id)\ + AUD_COMMON_REG_LIST_RI(id) + +static struct dce_audio_registers audio_regs[5]; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs_init(id)\ + VPG_DCN401_REG_LIST_RI(id) + +static struct dcn31_vpg_registers vpg_regs[9]; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs_init(id)\ + AFMT_DCN3_REG_LIST_RI(id) + +static struct dcn30_afmt_registers afmt_regs[5]; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs_init(id)\ + APG_DCN31_REG_LIST_RI(id) + +static struct dcn31_apg_registers apg_regs[4]; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs_init(id)\ + SE_DCN4_01_REG_LIST_RI(id) + +static struct dcn10_stream_enc_registers stream_enc_regs[4]; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN401(_MASK) +}; + +#define aux_regs_init(id)\ + DCN2_AUX_REG_LIST_RI(id) + +static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; + +#define hpd_regs_init(id)\ + HPD_REG_LIST_RI(id) + +static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; + +#define link_regs_init(id, phyid)\ + LE_DCN401_REG_LIST_RI(id) + +static struct dcn10_link_enc_registers link_enc_regs[4]; + + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN401(__SHIFT) +}; + + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN401(_MASK) +}; + + +#define hpo_dp_stream_encoder_reg_init(id)\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) + +static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_init(id)\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) + /*DCN3_1_RDPCSTX_REG_LIST(0),*/ + /*DCN3_1_RDPCSTX_REG_LIST(1),*/ + /*DCN3_1_RDPCSTX_REG_LIST(2),*/ + /*DCN3_1_RDPCSTX_REG_LIST(3),*/ + +static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[4]; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs_init(id)\ + DPP_REG_LIST_DCN401_COMMON_RI(id) + +static struct dcn401_dpp_registers dpp_regs[4]; + +static const struct dcn401_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN401_COMMON(__SHIFT) +}; + +static const struct dcn401_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN401_COMMON(_MASK) +}; + +#define opp_regs_init(id)\ + OPP_REG_LIST_DCN401_RI(id) + +static struct dcn20_opp_registers opp_regs[4]; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs_init(id) \ + AUX_COMMON_REG_LIST0_RI(id), SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ + SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) + +static struct dce110_aux_registers aux_engine_regs[5]; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +#define dwbc_regs_dcn401_init(id)\ + DWBC_COMMON_REG_LIST_DCN30_RI(id) + +static struct dcn30_dwbc_registers dwbc401_regs[1]; + +static const struct dcn30_dwbc_shift dwbc401_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc401_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define mcif_wb_regs_dcn3_init(id)\ + MCIF_WB_COMMON_REG_LIST_DCN32_RI(id) + +static struct dcn30_mmhubbub_registers mcif_wb30_regs[1]; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define dsc_regs_init(id)\ + DSC_REG_LIST_DCN401_RI(id) + +static struct dcn401_dsc_registers dsc_regs[4]; + +static const struct dcn401_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN401(__SHIFT) +}; + +static const struct dcn401_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN401(_MASK) +}; + +static struct dcn401_mpc_registers mpc_regs; + +#define dcn_mpc_regs_init()\ + MPC_REG_LIST_DCN4_01_RI(0),\ + MPC_REG_LIST_DCN4_01_RI(1),\ + MPC_REG_LIST_DCN4_01_RI(2),\ + MPC_REG_LIST_DCN4_01_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) + +static const struct dcn401_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN4_01(__SHIFT) +}; + +static const struct dcn401_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN4_01(_MASK) +}; + +#define optc_regs_init(id)\ + OPTC_COMMON_REG_LIST_DCN401_RI(id) + +static struct dcn_optc_registers optc_regs[4]; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN401(_MASK) +}; + +#define hubp_regs_init(id)\ + HUBP_REG_LIST_DCN401_RI(id) + +static struct dcn_hubp2_registers hubp_regs[4]; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN401(_MASK) +}; + +static struct dcn_hubbub_registers hubbub_reg; +#define hubbub_reg_init()\ + HUBBUB_REG_LIST_DCN4_01_RI(0) + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN4_01(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN4_01(_MASK) +}; + +static struct dccg_registers dccg_regs; + +#define dccg_regs_init()\ + DCCG_REG_LIST_DCN401_RI() + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN401(_MASK) +}; + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN401_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN19_PG_CONFIG), \ + SR(DOMAIN22_PG_CONFIG), \ + SR(DOMAIN23_PG_CONFIG), \ + SR(DOMAIN24_PG_CONFIG), \ + SR(DOMAIN25_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(DOMAIN19_PG_STATUS), \ + SR(DOMAIN22_PG_STATUS), \ + SR(DOMAIN23_PG_STATUS), \ + SR(DOMAIN24_PG_STATUS), \ + SR(DOMAIN25_PG_STATUS), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(HPO_TOP_HW_CONTROL),\ + SR(AZALIA_CONTROLLER_CLOCK_GATING) + +static struct dce_hwseq_registers hwseq_reg; + +#define hwseq_reg_init()\ + HWSEQ_DCN401_REG_LIST() + +#define HWSEQ_DCN401_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN401_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN401_MASK_SH_LIST(_MASK) +}; + +#define vmid_regs_init(id)\ + DCN20_VMID_REG_LIST_RI(id) + +static struct dcn_vmid_registers vmid_regs[16]; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn4_01 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 4, + .num_stream_encoder = 4, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 4, + .num_pll = 4, + .num_dwb = 1, + .num_ddc = 4, + .num_vmid = 16, + .num_mpc_3dlut = 4, + .num_dsc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .enable_mem_low_power = { + .bits = { + .vga = false, + .i2c = false, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = false, + .cm = false, + .mpc = false, + .optc = true, + } + }, + .use_max_lb = true, + .force_disable_subvp = false, + .exit_idle_opt_for_cursor_updates = true, + .using_dml2 = true, + .using_dml21 = true, + .enable_single_display_2to1_odm_policy = true, + + //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions + .enable_double_buffered_dsc_pg_support = true, + .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, + .alloc_extra_way_for_cursor = true, + .min_prefetch_in_strobe_ns = 60000, // 60us + .disable_unbounded_requesting = false, + .enable_legacy_fast_update = false, + .fams2_config = { + .bits = { + .enable = true, + .enable_offload_flip = true, + .enable_stall_recovery = true, + } + }, + .force_cositing = CHROMA_COSITING_TOPLEFT + 1, +}; + +static struct dce_aux *dcn401_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT aux_engine_regs + aux_engine_regs_init(0), + aux_engine_regs_init(1), + aux_engine_regs_init(2), + aux_engine_regs_init(3); + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs_init(id)\ + I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) + +static struct dce_i2c_registers i2c_hw_regs[5]; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN401(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN401(_MASK) +}; + +static struct dce_i2c_hw *dcn401_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT i2c_hw_regs + i2c_inst_regs_init(1), + i2c_inst_regs_init(2), + i2c_inst_regs_init(3), + i2c_inst_regs_init(4); + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct clock_source *dcn401_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn401_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct hubbub *dcn401_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub2) + return NULL; + + +#undef REG_STRUCT +#define REG_STRUCT hubbub_reg + hubbub_reg_init(); + +#undef REG_STRUCT +#define REG_STRUCT vmid_regs + vmid_regs_init(0), + vmid_regs_init(1), + vmid_regs_init(2), + vmid_regs_init(3), + vmid_regs_init(4), + vmid_regs_init(5), + vmid_regs_init(6), + vmid_regs_init(7), + vmid_regs_init(8), + vmid_regs_init(9), + vmid_regs_init(10), + vmid_regs_init(11), + vmid_regs_init(12), + vmid_regs_init(13), + vmid_regs_init(14), + vmid_regs_init(15); + + hubbub401_construct(hubbub2, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + DCN4_01_DEFAULT_DET_SIZE, //nominal (default) detile buffer size in kbytes, + 8, //dml2 ip_params_st.pixel_chunk_size_kbytes + DCN4_01_CRB_SIZE_KB); //dml2 ip_params_st.config_return_buffer_size_in_kbytes + + for (i = 0; i < res_cap_dcn4_01.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub2->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub2->base; +} + +static struct hubp *dcn401_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubp_regs + hubp_regs_init(0), + hubp_regs_init(1), + hubp_regs_init(2), + hubp_regs_init(3); + + if (hubp401_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static void dcn401_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN401_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn401_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn401_dpp *dpp401 = + kzalloc(sizeof(struct dcn401_dpp), GFP_KERNEL); + + if (!dpp401) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dpp_regs + dpp_regs_init(0), + dpp_regs_init(1), + dpp_regs_init(2), + dpp_regs_init(3); + + if (dpp401_construct(dpp401, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp401->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp401); + return NULL; +} + +static struct mpc *dcn401_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn401_mpc *mpc401 = kzalloc(sizeof(struct dcn401_mpc), + GFP_KERNEL); + + if (!mpc401) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT mpc_regs + dcn_mpc_regs_init(); + + dcn401_mpc_construct(mpc401, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc401->base; +} + +static struct output_pixel_processor *dcn401_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp4 = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp4) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT opp_regs + opp_regs_init(0), + opp_regs_init(1), + opp_regs_init(2), + opp_regs_init(3); + + dcn20_opp_construct(opp4, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp4->base; +} + + +static struct timing_generator *dcn401_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; +#undef REG_STRUCT +#define REG_STRUCT optc_regs + optc_regs_init(0), + optc_regs_init(1), + optc_regs_init(2), + optc_regs_init(3); + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn401_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn401_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT link_enc_aux_regs + aux_regs_init(0), + aux_regs_init(1), + aux_regs_init(2), + aux_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_hpd_regs + hpd_regs_init(0), + hpd_regs_init(1), + hpd_regs_init(2), + hpd_regs_init(3); +#undef REG_STRUCT +#define REG_STRUCT link_enc_regs + link_regs_init(0, A), + link_regs_init(1, B), + link_regs_init(2, C), + link_regs_init(3, D); + + dcn401_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + return &enc20->enc10.base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS, + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn401_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + +#undef REG_STRUCT +#define REG_STRUCT audio_regs + audio_regs_init(0), + audio_regs_init(1), + audio_regs_init(2), + audio_regs_init(3), + audio_regs_init(4); + + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn401_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg4 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg4) + return NULL; + + +#undef REG_STRUCT +#define REG_STRUCT vpg_regs + vpg_regs_init(0), + vpg_regs_init(1), + vpg_regs_init(2), + vpg_regs_init(3), + vpg_regs_init(4), + vpg_regs_init(5), + vpg_regs_init(6), + vpg_regs_init(7), + vpg_regs_init(8); + + vpg31_construct(vpg4, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg4->base; +} + +static struct afmt *dcn401_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_afmt *afmt401 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt401) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT afmt_regs + afmt_regs_init(0), + afmt_regs_init(1), + afmt_regs_init(2), + afmt_regs_init(3), + afmt_regs_init(4); + + afmt3_construct(afmt401, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + return &afmt401->base; +} + +static struct apg *dcn401_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT apg_regs + apg_regs_init(0), + apg_regs_init(1), + apg_regs_init(2), + apg_regs_init(3); + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn401_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn401_vpg_create(ctx, vpg_inst); + afmt = dcn401_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } +#undef REG_STRUCT +#define REG_STRUCT stream_enc_regs + stream_enc_regs_init(0), + stream_enc_regs_init(1), + stream_enc_regs_init(2), + stream_enc_regs_init(3); + //stream_enc_regs_init(4); + + dcn401_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn401_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 5; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn401_vpg_create(ctx, vpg_inst); + apg = dcn401_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_stream_enc_regs + hpo_dp_stream_encoder_reg_init(0), + hpo_dp_stream_encoder_reg_init(1), + hpo_dp_stream_encoder_reg_init(2), + hpo_dp_stream_encoder_reg_init(3); + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn401_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_link_enc_regs + hpo_dp_link_encoder_reg_init(0), + hpo_dp_link_encoder_reg_init(1), + hpo_dp_link_encoder_reg_init(2), + hpo_dp_link_encoder_reg_init(3); + + hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn401_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hwseq_reg + hwseq_reg_init(); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn401_create_audio, + .create_stream_encoder = dcn401_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn401_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn401_hpo_dp_link_encoder_create, + .create_hwseq = dcn401_hwseq_create, +}; + +static void dcn401_dsc_destroy(struct display_stream_compressor **dsc) +{ + kfree(container_of(*dsc, struct dcn401_dsc, base)); + *dsc = NULL; +} + +static void dcn401_resource_destruct(struct dcn401_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN31_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN31_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn401_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(TO_DCN20_HUBBUB(pool->base.hubbub)); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn401_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.oem_device != NULL) { + struct dc *dc = pool->base.oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->base.oem_device); + } +} + + +static bool dcn401_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_dwbc *dwbc401 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc401) { + dm_error("DC: failed to create dwbc401!\n"); + return false; + } + + +#undef REG_STRUCT +#define REG_STRUCT dwbc401_regs + dwbc_regs_dcn401_init(0); + + dcn30_dwbc_construct(dwbc401, ctx, + &dwbc401_regs[i], + &dwbc401_shift, + &dwbc401_mask, + i); + + pool->dwbc[i] = &dwbc401->base; + + } + return true; +} + +static bool dcn401_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT mcif_wb30_regs + mcif_wb_regs_dcn3_init(0); + + dcn32_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn401_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn401_dsc *dsc = + kzalloc(sizeof(struct dcn401_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT dsc_regs + dsc_regs_init(0), + dsc_regs_init(1), + dsc_regs_init(2), + dsc_regs_init(3); + + dsc401_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + dsc401_set_fgcg(dsc, + ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc); + + //dsc->max_image_width = 6016; + dsc->max_image_width = 5760; + + return &dsc->base; +} + +static void dcn401_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn401_resource_pool *dcn401_pool = TO_DCN401_RES_POOL(*pool); + + dcn401_resource_destruct(dcn401_pool); + kfree(dcn401_pool); + *pool = NULL; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct dml2_configuration_options dml2_opt = dc->dml2_options; + + DC_FP_START(); + + dcn401_update_bw_bounding_box_fpu(dc, bw_params); + + dml2_opt.use_clock_dc_limits = false; + if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2) + dml2_reinit(dc, &dml2_opt, &dc->current_state->bw_ctx.dml2); + + dml2_opt.use_clock_dc_limits = true; + if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source) + dml2_reinit(dc, &dml2_opt, &dc->current_state->bw_ctx.dml2_dc_power_source); + + DC_FP_END(); +} + +enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D; + return DC_OK; +} + +bool dcn401_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + if (dc->debug.using_dml2) + out = dml2_validate(dc, context, + context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, + fast_validate); + return out; +} + +static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + struct link_encoder *link_enc = NULL; + struct pixel_clk_params *pixel_clk_params = &pipe_ctx->stream_res.pix_clk_params; + + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + + link_enc = link_enc_cfg_get_link_enc(link); + if (link_enc) + pixel_clk_params->encoder_object_id = link_enc->id; + + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + + /* TODO - DP2.0 HW: calculate requested_sym_clk for UHBR rates */ + + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + pixel_clk_params->color_depth = COLOR_DEPTH_888; + + /* TODO: Do we still need this? */ + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + if (dc_is_tmds_signal(stream->signal) && + stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + pixel_clk_params->requested_pix_clk_100hz /= 2; + + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); +} + +static struct resource_funcs dcn401_res_pool_funcs = { + .destroy = dcn401_destroy_resource_pool, + .link_enc_create = dcn401_link_encoder_create, + .link_enc_create_minimal = NULL, + .panel_cntl_create = dcn32_panel_cntl_create, + .validate_bandwidth = dcn401_validate_bandwidth, + .calculate_wm_and_dlg = NULL, + .populate_dml_pipes = NULL, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe, + .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn401_update_bw_bounding_box, + .patch_unknown_plane_state = dcn401_patch_unknown_plane_state, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .add_phantom_pipes = dcn32_add_phantom_pipes, + .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, + .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, +}; + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN401 support max 4 pipes */ + value = value & 0xf; + return value; +} + + +static bool dcn401_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn401_resource_pool *pool) +{ + int i, j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + uint32_t pipe_fuses = 0; + uint32_t num_pipes = 4; + +#undef REG_STRUCT +#define REG_STRUCT bios_regs + bios_regs_init(); + +#undef REG_STRUCT +#define REG_STRUCT clk_src_regs + clk_src_regs_init(0, A), + clk_src_regs_init(1, B), + clk_src_regs_init(2, C), + clk_src_regs_init(3, D); + +#undef REG_STRUCT +#define REG_STRUCT abm_regs + abm_regs_init(0), + abm_regs_init(1), + abm_regs_init(2), + abm_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT dccg_regs + dccg_regs_init(); + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn4_01; + + /* max number of pipes for ASIC before checking for pipe fuses */ + num_pipes = pool->base.res_cap->num_timing_generator; + pipe_fuses = read_pipe_fuses(ctx); + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + if (pipe_fuses & 1) + ASSERT(0); //Unexpected - Pipe 0 should always be fully functional! + + if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK) + ASSERT(0); //Entire DCN is harvested! + + pool->base.funcs = &dcn401_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.timing_generator_count = num_pipes; + pool->base.pipe_count = num_pipes; + pool->base.mpcc_count = num_pipes; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ + /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; + dc->caps.cursor_not_scaled = true; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 4; + /* total size = mall per channel * num channels * 1024 * 1024 */ + dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + dc->caps.cache_line_size = 64; + dc->caps.cache_num_ways = 16; + + /* Calculate the available MALL space */ + dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( + dc, dc->ctx->dc_bios->vram_info.num_chans) * + dc->caps.mall_size_per_mem_channel * 1024 * 1024; + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; + + dc->caps.subvp_fw_processing_delay_us = 15; + dc->caps.subvp_drr_max_vblank_margin_us = 40; + dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; + dc->caps.subvp_pstate_allow_width_us = 20; + dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin + + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2 and later ASICs + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + dc->config.use_spl = true; + dc->config.dc_mode_clk_limit_support = true; + dc->config.enable_windowed_mpo_odm = true; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN401_CLK_SRC_PLL0] = + dcn401_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN401_CLK_SRC_PLL1] = + dcn401_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN401_CLK_SRC_PLL2] = + dcn401_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN401_CLK_SRC_PLL3] = + dcn401_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + // pool->base.clock_sources[DCN401_CLK_SRC_PLL4] = + // dcn401_clock_source_create(ctx, ctx->dc_bios, + // CLOCK_SOURCE_COMBO_PHY_PLL4, + // &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN401_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn401_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->base.dccg = dccg401_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* IRQ Service */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn401_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn401_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs, TGs, ABMs */ + for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) { + + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if (pipe_fuses & 1 << i) + continue; + + pool->base.hubps[j] = dcn401_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[j] = dcn401_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + + pool->base.opps[j] = dcn401_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + + pool->base.timing_generators[j] = dcn401_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + + pool->base.multiple_abms[j] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[j] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* index for resource pool arrays for next valid pipe */ + j++; + } + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* MPCCs */ + pool->base.mpc = dcn401_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + /* DSCs */ + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn401_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB */ + if (!dcn401_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + /* MMHUBBUB */ + if (!dcn401_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn401_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn401_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer init functions and Plane caps */ + dcn401_hw_sequencer_init_functions(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->base.oem_device = NULL; + } + + //For now enable SDPIF_REQUEST_RATE_LIMIT on DCN4_01 when vram_info.num_chans provided + if (dc->config.sdpif_request_limit_words_per_umc == 0) + dc->config.sdpif_request_limit_words_per_umc = 16; + + dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; + dc->dml2_options.use_native_pstate_optimization = false; + dc->dml2_options.use_native_soc_bb_construction = true; + dc->dml2_options.minimize_dispclk_using_odm = true; + dc->dml2_options.map_dc_pipes_with_callbacks = true; + + resource_init_common_dml2_callbacks(dc, &dc->dml2_options); + dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; + dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + dc->dml2_options.svp_pstate.callbacks.calculate_mall_ways_from_bytes = pool->base.funcs->calculate_mall_ways_from_bytes; + + dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; + dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; + dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us; + dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines; + + dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp; + dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch; + + dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size; + dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways; + dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes; + dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE; + dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE; + dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES; + dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH; + + dc->dml2_options.max_segments_per_hubp = 20; + dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; + + return true; + +create_fail: + + dcn401_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn401_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn401_resource_pool *pool = + kzalloc(sizeof(struct dcn401_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn401_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h new file mode 100644 index 000000000000..d4dce2b4b6c1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef _DCN401_RESOURCE_H_ +#define _DCN401_RESOURCE_H_ + +#include "core_types.h" +#include "dcn32/dcn32_resource.h" +#include "dcn401/dcn401_hubp.h" + +#define TO_DCN401_RES_POOL(pool)\ + container_of(pool, struct dcn401_resource_pool, base) + +struct dcn401_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn401_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state); + +bool dcn401_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate); + +/* Following are definitions for run time init of reg offsets */ + +/* HUBP */ +#define HUBP_REG_LIST_DCN401_RI(id) \ + SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id), \ + SRI_ARR(DCHUBP_CNTL, HUBP, id), \ + SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \ + SRI_ARR(HUBPREQ_DEBUG, HUBP, id), \ + SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_SURFACE_PITCH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_PITCH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_FLIP_CONTROL, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_START, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_START, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_CONTROL, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id), \ + SRI_ARR(HUBPRET_CONTROL, HUBPRET, id), \ + SRI_ARR(HUBPRET_READ_LINE_STATUS, HUBPRET, id), \ + SRI_ARR(DCN_EXPANSION_MODE, HUBPREQ, id), \ + SRI_ARR(DCHUBP_REQ_SIZE_CONFIG, HUBP, id), \ + SRI_ARR(DCHUBP_REQ_SIZE_CONFIG_C, HUBP, id), \ + SRI_ARR(BLANK_OFFSET_0, HUBPREQ, id), \ + SRI_ARR(BLANK_OFFSET_1, HUBPREQ, id), \ + SRI_ARR(DST_DIMENSIONS, HUBPREQ, id), \ + SRI_ARR(DST_AFTER_SCALER, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(PER_LINE_DELIVERY_PRE, HUBPREQ, id), \ + SRI_ARR(PER_LINE_DELIVERY, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_6, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_7, HUBPREQ, id), \ + SRI_ARR(DCN_TTU_QOS_WM, HUBPREQ, id), \ + SRI_ARR(DCN_GLOBAL_TTU_CNTL, HUBPREQ, id), \ + SRI_ARR(DCN_SURF0_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_SURF0_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCN_SURF1_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(HUBP_CLK_CNTL, HUBP, id), \ + SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ + SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \ + SRI_ARR(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, HUBPREQ, id), \ + SRI_ARR(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, HUBPREQ, id), \ + SRI_ARR(CURSOR_SETTINGS, HUBPREQ, id), \ + SRI_ARR(CURSOR_SURFACE_ADDRESS_HIGH, CURSOR0_, id), \ + SRI_ARR(CURSOR_SURFACE_ADDRESS, CURSOR0_, id), \ + SRI_ARR(CURSOR_SIZE, CURSOR0_, id), \ + SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ + SRI_ARR(CURSOR_POSITION, CURSOR0_, id), \ + SRI_ARR(CURSOR_HOT_SPOT, CURSOR0_, id), \ + SRI_ARR(CURSOR_DST_OFFSET, CURSOR0_, id), \ + SRI_ARR(DMDATA_ADDRESS_HIGH, CURSOR0_, id), \ + SRI_ARR(DMDATA_ADDRESS_LOW, CURSOR0_, id), \ + SRI_ARR(DMDATA_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_SW_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_QOS_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_SW_DATA, CURSOR0_, id), \ + SRI_ARR(DMDATA_STATUS, CURSOR0_, id), \ + SRI_ARR(FLIP_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \ + SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id), \ + SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id), \ + SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ + SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ + SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ + HUBP_3DLUT_FL_REG_LIST_DCN401(id) + +/* ABM */ +#define ABM_DCN401_REG_LIST_RI(id) \ + SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ + SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ + SRI_ARR(DC_ABM1_HG_MISC_CTRL, ABM, id), \ + SRI_ARR(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \ + SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ + SRI_ARR(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \ + SRI_ARR(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \ + SRI_ARR(BL1_PWM_USER_LEVEL, ABM, id), \ + SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ + SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ + SRI_ARR(DC_ABM1_HG_BIN_33_40_SHIFT_INDEX, ABM, id), \ + SRI_ARR(DC_ABM1_HG_BIN_33_64_SHIFT_FLAG, ABM, id), \ + SRI_ARR(DC_ABM1_HG_BIN_41_48_SHIFT_INDEX, ABM, id), \ + SRI_ARR(DC_ABM1_HG_BIN_49_56_SHIFT_INDEX, ABM, id), \ + SRI_ARR(DC_ABM1_HG_BIN_57_64_SHIFT_INDEX, ABM, id), \ + SRI_ARR(DC_ABM1_HG_RESULT_DATA, ABM, id), \ + SRI_ARR(DC_ABM1_HG_RESULT_INDEX, ABM, id), \ + SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_DATA, ABM, id), \ + SRI_ARR(DC_ABM1_ACE_PWL_CNTL, ABM, id), \ + SRI_ARR(DC_ABM1_ACE_THRES_DATA, ABM, id), \ + NBIO_SR_ARR(BIOS_SCRATCH_2, id) + +/* VPG */ +#define VPG_DCN401_REG_LIST_RI(id) \ + VPG_DCN3_REG_LIST_RI(id), \ + SRI_ARR(VPG_MEM_PWR, VPG, id) + +/* Stream encoder */ +#define SE_DCN4_01_REG_LIST_RI(id) \ + SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \ + SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ + SRI_ARR(HDMI_GC, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_ACR_32_0, DIG, id), SRI_ARR(HDMI_ACR_32_1, DIG, id), \ + SRI_ARR(HDMI_ACR_44_0, DIG, id), SRI_ARR(HDMI_ACR_44_1, DIG, id), \ + SRI_ARR(HDMI_ACR_48_0, DIG, id), SRI_ARR(HDMI_ACR_48_1, DIG, id), \ + SRI_ARR(DP_DB_CNTL, DP, id), SRI_ARR(DP_MSA_MISC, DP, id), \ + SRI_ARR(DP_MSA_VBID_MISC, DP, id), SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ + SRI_ARR(DP_MSE_RATE_CNTL, DP, id), SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ + SRI_ARR(DP_PIXEL_FORMAT, DP, id), SRI_ARR(DP_SEC_CNTL, DP, id), \ + SRI_ARR(DP_SEC_CNTL1, DP, id), SRI_ARR(DP_SEC_CNTL2, DP, id), \ + SRI_ARR(DP_SEC_CNTL5, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \ + SRI_ARR(DP_STEER_FIFO, DP, id), SRI_ARR(DP_VID_M, DP, id), \ + SRI_ARR(DP_VID_N, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ + SRI_ARR(DP_VID_TIMING, DP, id), SRI_ARR(DP_SEC_AUD_N, DP, id), \ + SRI_ARR(DP_SEC_TIMESTAMP, DP, id), \ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DP_SEC_FRAMING4, DP, id), SRI_ARR(DP_GSP11_CNTL, DP, id), \ + SRI_ARR(DME_CONTROL, DME, id), \ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DIG_FE_CNTL, DIG, id), \ + SRI_ARR(DIG_FE_EN_CNTL, DIG, id), \ + SRI_ARR(DIG_FE_CLK_CNTL, DIG, id), \ + SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ + SRI_ARR(DIG_FIFO_CTRL0, DIG, id), \ + SRI_ARR(STREAM_MAPPER_CONTROL, DIG, id) + +/* Link encoder */ +#define LE_DCN401_REG_LIST_RI(id) \ + LE_DCN3_REG_LIST_RI(id), \ + SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ + SRI_ARR(DIG_BE_CLK_CNTL, DIG, id) + +/* DPP */ +#define DPP_REG_LIST_DCN401_COMMON_RI(id) \ + SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \ + SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \ + SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_CONTROL, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_DATA, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_REGION_0_1, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_REGION_32_33, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_REGION_0_1, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_REGION_32_33, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_R, CM, id), \ + SRI_ARR(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \ + SRI_ARR(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \ + SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \ + SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \ + SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \ + SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \ + SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \ + SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \ + SRI_ARR(DSCL_2TAP_CONTROL, DSCL, id), SRI_ARR(MPC_SIZE, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO_C, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO_C, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_INIT, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_INIT_C, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_INIT, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_INIT_C, DSCL, id), \ + SRI_ARR(RECOUT_START, DSCL, id), SRI_ARR(RECOUT_SIZE, DSCL, id), \ + SRI_ARR(PRE_DEALPHA, CNVC_CFG, id), SRI_ARR(PRE_REALPHA, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_MODE, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_C11_C12, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_C33_C34, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_B_C11_C12, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_B_C33_C34, CNVC_CFG, id), \ + SRI_ARR(CM_POST_CSC_CONTROL, CM, id), \ + SRI_ARR(CM_POST_CSC_C11_C12, CM, id), \ + SRI_ARR(CM_POST_CSC_C33_C34, CM, id), \ + SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ + SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ + SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \ + SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ + SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ + SRI_ARR(CURSOR0_CONTROL, CM_CUR, id), \ + SRI_ARR(CURSOR0_COLOR0, CM_CUR, id), \ + SRI_ARR(CURSOR0_COLOR1, CM_CUR, id), \ + SRI_ARR(CURSOR0_FP_SCALE_BIAS_G_Y, CM_CUR, id), \ + SRI_ARR(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_MODE, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C11_C12_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C13_C14_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C21_C22_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C23_C24_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C31_C32_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C33_C34_A, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C11_C12_B, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C13_C14_B, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C21_C22_B, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C23_C24_B, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C31_C32_B, CM_CUR, id), \ + SRI_ARR(CUR0_MATRIX_C33_C34_B, CM_CUR, id), \ + SRI_ARR(DPP_CONTROL, DPP_TOP, id), SRI_ARR(CM_HDR_MULT_COEF, CM, id), \ + SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ + SRI_ARR(ALPHA_2BIT_LUT, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_R, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_G, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_B, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_R, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_G, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_B, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_CONTROL, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_ALPHA, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_RED, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_GREEN, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_BLUE, CNVC_CFG, id), \ + SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \ + SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \ + SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id), \ + SRI_ARR(DSCL_CONTROL, DSCL, id) + +/* OPP */ +#define OPP_REG_LIST_DCN401_RI(id) \ + OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \ + SRI_ARR(FMT_422_CONTROL, FMT, id) + +/* DSC */ +#define DSC_REG_LIST_DCN401_RI(id) \ + SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \ + SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \ + SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \ + SRI_ARR(DSCC_STATUS, DSCC, id), \ + SRI_ARR(DSCC_INTERRUPT_CONTROL0, DSCC, id), \ + SRI_ARR(DSCC_INTERRUPT_CONTROL1, DSCC, id), \ + SRI_ARR(DSCC_INTERRUPT_STATUS0, DSCC, id), \ + SRI_ARR(DSCC_INTERRUPT_STATUS1, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG0, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG1, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG2, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG3, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG4, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG5, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG6, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG7, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG8, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG9, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG10, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG11, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG12, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG13, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG14, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG15, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG16, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG17, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG18, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG19, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG20, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG21, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG22, DSCC, id), \ + SRI_ARR(DSCC_MEM_POWER_CONTROL0, DSCC, id), \ + SRI_ARR(DSCC_MEM_POWER_CONTROL1, DSCC, id), \ + SRI_ARR(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_MAX_ABS_ERROR0, DSCC, id), \ + SRI_ARR(DSCC_MAX_ABS_ERROR1, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, DSCC, id), \ + SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \ + SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ + SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) + +/* MPC */ +#define MPC_DWB_MUX_REG_LIST_DCN4_01_RI(inst) \ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(inst) + +#define MPC_OUT_MUX_COMMON_REG_LIST_DCN4_01_RI(inst) \ + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) + +#define MPC_OUT_MUX_REG_LIST_DCN4_01_RI(inst) \ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst) + +/* OPTC */ +#define OPTC_COMMON_REG_LIST_DCN401_RI(inst) \ + SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \ + SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \ + SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \ + SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst), \ + SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst), \ + SRI_ARR(OTG_H_TOTAL, OTG, inst), \ + SRI_ARR(OTG_H_BLANK_START_END, OTG, inst), \ + SRI_ARR(OTG_H_SYNC_A, OTG, inst), SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst), \ + SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst), SRI_ARR(OTG_V_TOTAL, OTG, inst), \ + SRI_ARR(OTG_V_BLANK_START_END, OTG, inst), \ + SRI_ARR(OTG_V_SYNC_A, OTG, inst), SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst), \ + SRI_ARR(OTG_CONTROL, OTG, inst), SRI_ARR(OTG_STEREO_CONTROL, OTG, inst), \ + SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst), \ + SRI_ARR(OTG_STEREO_STATUS, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst), \ + SRI_ARR(OTG_TRIGA_CNTL, OTG, inst), \ + SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst), \ + SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst), \ + SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst), \ + SRI_ARR(OTG_STATUS, OTG, inst), SRI_ARR(OTG_STATUS_POSITION, OTG, inst), \ + SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst), \ + SRI_ARR(OTG_M_CONST_DTO0, OTG, inst), \ + SRI_ARR(OTG_M_CONST_DTO1, OTG, inst), \ + SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst), \ + SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst), \ + SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst), \ + SRI_ARR(CONTROL, VTG, inst), SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst), \ + SRI_ARR(OTG_GSL_CONTROL, OTG, inst), SRI_ARR(OTG_CRC_CNTL, OTG, inst), \ + SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst), \ + SRI_ARR(OTG_CRC0_DATA_B, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst), \ + SR_ARR(GSL_SOURCE_SELECT, inst), \ + SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ + SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst), \ + SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst), \ + SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst), \ + SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst), \ + SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst), \ + SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \ + SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ + SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ + SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + +/* HUBBUB */ +#define HUBBUB_REG_LIST_DCN4_01_RI(id) \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \ + SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ + SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), \ + SR(DCHUBBUB_ARB_SAT_LEVEL), \ + SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), \ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_TEST_DEBUG_INDEX), \ + SR(DCHUBBUB_TEST_DEBUG_DATA), \ + SR(DCHUBBUB_SOFT_RESET), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DCN_VM_FB_LOCATION_BASE), \ + SR(DCN_VM_FB_LOCATION_TOP), \ + SR(DCN_VM_FB_OFFSET), \ + SR(DCN_VM_AGP_BOT), \ + SR(DCN_VM_AGP_TOP), \ + SR(DCN_VM_AGP_BASE), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A), \ + SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B), \ + SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B), \ + SR(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A), \ + SR(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B), \ + SR(DCHUBBUB_DET0_CTRL), \ + SR(DCHUBBUB_DET1_CTRL), \ + SR(DCHUBBUB_DET2_CTRL), \ + SR(DCHUBBUB_DET3_CTRL), \ + SR(DCHUBBUB_COMPBUF_CTRL), \ + SR(COMPBUF_RESERVED_SPACE), \ + SR(DCHUBBUB_DEBUG_CTRL_0), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B), \ + SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ + SR(DCN_VM_FAULT_CNTL), \ + SR(DCN_VM_FAULT_STATUS), \ + SR(SDPIF_REQUEST_RATE_LIMIT), \ + SR(DCHUBBUB_CLOCK_CNTL), \ + SR(DCHUBBUB_SDPIF_CFG0), \ + SR(DCHUBBUB_SDPIF_CFG1), \ + SR(DCHUBBUB_MEM_PWR_MODE_CTRL) + +/* DCCG */ + +#define DCCG_REG_LIST_DCN401_RI() \ + SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \ + DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \ + DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \ + SR(PHYASYMCLK_CLOCK_CNTL), SR(PHYBSYMCLK_CLOCK_CNTL), \ + SR(PHYCSYMCLK_CLOCK_CNTL), SR(PHYDSYMCLK_CLOCK_CNTL), \ + SR(DPSTREAMCLK_CNTL), SR(HDMISTREAMCLK_CNTL), \ + SR(SYMCLK32_SE_CNTL), SR(SYMCLK32_LE_CNTL), \ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1), \ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3), \ + SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \ + SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL), \ + SR(DPPCLK_CTRL), \ + DCCG_SRII(MODULO, DP_DTO, 0), DCCG_SRII(MODULO, DP_DTO, 1), \ + DCCG_SRII(MODULO, DP_DTO, 2), DCCG_SRII(MODULO, DP_DTO, 3), \ + DCCG_SRII(PHASE, DP_DTO, 0), DCCG_SRII(PHASE, DP_DTO, 1), \ + DCCG_SRII(PHASE, DP_DTO, 2), DCCG_SRII(PHASE, DP_DTO, 3), \ + SR(DSCCLK0_DTO_PARAM),\ + SR(DSCCLK1_DTO_PARAM),\ + SR(DSCCLK2_DTO_PARAM),\ + SR(DSCCLK3_DTO_PARAM),\ + SR(DSCCLK_DTO_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL),\ + SR(DCCG_GATE_DISABLE_CNTL2),\ + SR(DCCG_GATE_DISABLE_CNTL3),\ + SR(DCCG_GATE_DISABLE_CNTL4),\ + SR(DCCG_GATE_DISABLE_CNTL5),\ + SR(DCCG_GATE_DISABLE_CNTL6) + +#endif /* _DCN401_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile new file mode 100644 index 000000000000..89cad60b1a10 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -0,0 +1,33 @@ +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# +# Makefile for the 'spl' sub-component of DAL. +# It provides the scaling library interface. + +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_isharp_filters.o + +AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) + +AMD_DISPLAY_FILES += $(AMD_DAL_SPL) + + + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c new file mode 100644 index 000000000000..542cd6cdef46 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -0,0 +1,1354 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl.h" +#include "dc_spl_scl_filters.h" +#include "dc_spl_isharp_filters.h" + +#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) +#define MIN_VIEWPORT_SIZE 12 + +static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) +{ + struct spl_rect rec; + int r0_x_end = r0->x + r0->width; + int r1_x_end = r1->x + r1->width; + int r0_y_end = r0->y + r0->height; + int r1_y_end = r1->y + r1->height; + + rec.x = r0->x > r1->x ? r0->x : r1->x; + rec.width = r0_x_end > r1_x_end ? r1_x_end - rec.x : r0_x_end - rec.x; + rec.y = r0->y > r1->y ? r0->y : r1->y; + rec.height = r0_y_end > r1_y_end ? r1_y_end - rec.y : r0_y_end - rec.y; + + /* in case that there is no intersection */ + if (rec.width < 0 || rec.height < 0) + memset(&rec, 0, sizeof(rec)); + + return rec; +} + +static struct spl_rect shift_rec(const struct spl_rect *rec_in, int x, int y) +{ + struct spl_rect rec_out = *rec_in; + + rec_out.x += x; + rec_out.y += y; + + return rec_out; +} + +static struct spl_rect calculate_plane_rec_in_timing_active( + struct spl_in *spl_in, + const struct spl_rect *rec_in) +{ + /* + * The following diagram shows an example where we map a 1920x1200 + * desktop to a 2560x1440 timing with a plane rect in the middle + * of the screen. To map a plane rect from Stream Source to Timing + * Active space, we first multiply stream scaling ratios (i.e 2304/1920 + * horizontal and 1440/1200 vertical) to the plane's x and y, then + * we add stream destination offsets (i.e 128 horizontal, 0 vertical). + * This will give us a plane rect's position in Timing Active. However + * we have to remove the fractional. The rule is that we find left/right + * and top/bottom positions and round the value to the adjacent integer. + * + * Stream Source Space + * ------------ + * __________________________________________________ + * |Stream Source (1920 x 1200) ^ | + * | y | + * | <------- w --------|> | + * | __________________V | + * |<-- x -->|Plane//////////////| ^ | + * | |(pre scale)////////| | | + * | |///////////////////| | | + * | |///////////////////| h | + * | |///////////////////| | | + * | |///////////////////| | | + * | |///////////////////| V | + * | | + * | | + * |__________________________________________________| + * + * + * Timing Active Space + * --------------------------------- + * + * Timing Active (2560 x 1440) + * __________________________________________________ + * |*****| Stteam Destination (2304 x 1440) |*****| + * |*****| |*****| + * |<128>| |*****| + * |*****| __________________ |*****| + * |*****| |Plane/////////////| |*****| + * |*****| |(post scale)//////| |*****| + * |*****| |//////////////////| |*****| + * |*****| |//////////////////| |*****| + * |*****| |//////////////////| |*****| + * |*****| |//////////////////| |*****| + * |*****| |*****| + * |*****| |*****| + * |*****| |*****| + * |*****|______________________________________|*****| + * + * So the resulting formulas are shown below: + * + * recout_x = 128 + round(plane_x * 2304 / 1920) + * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x + * recout_y = 0 + round(plane_y * 1440 / 1280) + * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y + * + * NOTE: fixed point division is not error free. To reduce errors + * introduced by fixed point division, we divide only after + * multiplication is complete. + */ + const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; + const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; + struct spl_rect rec_out = {0}; + struct fixed31_32 temp; + + + temp = dc_fixpt_from_fraction(rec_in->x * stream_dst->width, + stream_src->width); + rec_out.x = stream_dst->x + dc_fixpt_round(temp); + + temp = dc_fixpt_from_fraction( + (rec_in->x + rec_in->width) * stream_dst->width, + stream_src->width); + rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; + + temp = dc_fixpt_from_fraction(rec_in->y * stream_dst->height, + stream_src->height); + rec_out.y = stream_dst->y + dc_fixpt_round(temp); + + temp = dc_fixpt_from_fraction( + (rec_in->y + rec_in->height) * stream_dst->height, + stream_src->height); + rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; + + return rec_out; +} + +static struct spl_rect calculate_mpc_slice_in_timing_active( + struct spl_in *spl_in, + struct spl_rect *plane_clip_rec) +{ + int mpc_slice_count = spl_in->basic_in.mpc_combine_h; + int mpc_slice_idx = spl_in->basic_in.mpc_combine_v; + int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1; + struct spl_rect mpc_rec; + + mpc_rec.width = plane_clip_rec->width / mpc_slice_count; + mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + ASSERT(mpc_slice_count == 1 || + spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || + mpc_rec.width % 2 == 0); + + /* extra pixels in the division remainder need to go to pipes after + * the extra pixel index minus one(epimo) defined here as: + */ + if (mpc_slice_idx > epimo) { + mpc_rec.x += mpc_slice_idx - epimo - 1; + mpc_rec.width += 1; + } + + if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { + ASSERT(mpc_rec.height % 2 == 0); + mpc_rec.height /= 2; + } + return mpc_rec; +} + +static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_in) +{ + int odm_slice_count = spl_in->basic_out.odm_combine_factor; + int odm_slice_idx = spl_in->odm_slice_index; + bool is_last_odm_slice = (odm_slice_idx + 1) == odm_slice_count; + int h_active = spl_in->basic_out.output_size.width; + int v_active = spl_in->basic_out.output_size.height; + int odm_slice_width = h_active / odm_slice_count; + struct spl_rect odm_rec; + + odm_rec.x = odm_slice_width * odm_slice_idx; + odm_rec.width = is_last_odm_slice ? + /* last slice width is the reminder of h_active */ + h_active - odm_slice_width * (odm_slice_count - 1) : + /* odm slice width is the floor of h_active / count */ + odm_slice_width; + odm_rec.y = 0; + odm_rec.height = v_active; + + return odm_rec; +} + +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) +{ + /* + * A plane clip represents the desired plane size and position in Stream + * Source Space. Stream Source is the destination where all planes are + * blended (i.e. positioned, scaled and overlaid). It is a canvas where + * all planes associated with the current stream are drawn together. + * After Stream Source is completed, we will further scale and + * reposition the entire canvas of the stream source to Stream + * Destination in Timing Active Space. This could be due to display + * overscan adjustment where we will need to rescale and reposition all + * the planes so they can fit into a TV with overscan or downscale + * upscale features such as GPU scaling or VSR. + * + * This two step blending is a virtual procedure in software. In + * hardware there is no such thing as Stream Source. all planes are + * blended once in Timing Active Space. Software virtualizes a Stream + * Source space to decouple the math complicity so scaling param + * calculation focuses on one step at a time. + * + * In the following two diagrams, user applied 10% overscan adjustment + * so the Stream Source needs to be scaled down a little before mapping + * to Timing Active Space. As a result the Plane Clip is also scaled + * down by the same ratio, Plane Clip position (i.e. x and y) with + * respect to Stream Source is also scaled down. To map it in Timing + * Active Space additional x and y offsets from Stream Destination are + * added to Plane Clip as well. + * + * Stream Source Space + * ------------ + * __________________________________________________ + * |Stream Source (3840 x 2160) ^ | + * | y | + * | | | + * | __________________V | + * |<-- x -->|Plane Clip/////////| | + * | |(pre scale)////////| | + * | |///////////////////| | + * | |///////////////////| | + * | |///////////////////| | + * | |///////////////////| | + * | |///////////////////| | + * | | + * | | + * |__________________________________________________| + * + * + * Timing Active Space (3840 x 2160) + * --------------------------------- + * + * Timing Active + * __________________________________________________ + * | y_____________________________________________ | + * |x |Stream Destination (3456 x 1944) | | + * | | | | + * | | __________________ | | + * | | |Plane Clip////////| | | + * | | |(post scale)//////| | | + * | | |//////////////////| | | + * | | |//////////////////| | | + * | | |//////////////////| | | + * | | |//////////////////| | | + * | | | | + * | | | | + * | |____________________________________________| | + * |__________________________________________________| + * + * + * In Timing Active Space a plane clip could be further sliced into + * pieces called MPC slices. Each Pipe Context is responsible for + * processing only one MPC slice so the plane processing workload can be + * distributed to multiple DPP Pipes. MPC slices could be blended + * together to a single ODM slice. Each ODM slice is responsible for + * processing a portion of Timing Active divided horizontally so the + * output pixel processing workload can be distributed to multiple OPP + * pipes. All ODM slices are mapped together in ODM block so all MPC + * slices belong to different ODM slices could be pieced together to + * form a single image in Timing Active. MPC slices must belong to + * single ODM slice. If an MPC slice goes across ODM slice boundary, it + * needs to be divided into two MPC slices one for each ODM slice. + * + * In the following diagram the output pixel processing workload is + * divided horizontally into two ODM slices one for each OPP blend tree. + * OPP0 blend tree is responsible for processing left half of Timing + * Active, while OPP2 blend tree is responsible for processing right + * half. + * + * The plane has two MPC slices. However since the right MPC slice goes + * across ODM boundary, two DPP pipes are needed one for each OPP blend + * tree. (i.e. DPP1 for OPP0 blend tree and DPP2 for OPP2 blend tree). + * + * Assuming that we have a Pipe Context associated with OPP0 and DPP1 + * working on processing the plane in the diagram. We want to know the + * width and height of the shaded rectangle and its relative position + * with respect to the ODM slice0. This is called the recout of the pipe + * context. + * + * Planes can be at arbitrary size and position and there could be an + * arbitrary number of MPC and ODM slices. The algorithm needs to take + * all scenarios into account. + * + * Timing Active Space (3840 x 2160) + * --------------------------------- + * + * Timing Active + * __________________________________________________ + * |OPP0(ODM slice0)^ |OPP2(ODM slice1) | + * | y | | + * | | <- w -> | + * | _____V________|____ | + * | |DPP0 ^ |DPP1 |DPP2| | + * |<------ x |-----|->|/////| | | + * | | | |/////| | | + * | | h |/////| | | + * | | | |/////| | | + * | |_____V__|/////|____| | + * | | | + * | | | + * | | | + * |_________________________|________________________| + * + * + */ + struct spl_rect plane_clip; + struct spl_rect mpc_slice_of_plane_clip; + struct spl_rect odm_slice; + struct spl_rect overlapping_area; + + plane_clip = calculate_plane_rec_in_timing_active(spl_in, + &spl_in->basic_in.clip_rect); + /* guard plane clip from drawing beyond stream dst here */ + plane_clip = intersect_rec(&plane_clip, + &spl_in->basic_out.dst_rect); + mpc_slice_of_plane_clip = calculate_mpc_slice_in_timing_active( + spl_in, &plane_clip); + odm_slice = calculate_odm_slice_in_timing_active(spl_in); + overlapping_area = intersect_rec(&mpc_slice_of_plane_clip, &odm_slice); + + if (overlapping_area.height > 0 && + overlapping_area.width > 0) + /* shift the overlapping area so it is with respect to current + * ODM slice's position + */ + spl_out->scl_data.recout = shift_rec( + &overlapping_area, + -odm_slice.x, -odm_slice.y); + else + /* if there is no overlap, zero recout */ + memset(&spl_out->scl_data.recout, 0, + sizeof(struct spl_rect)); +} +/* Calculate scaling ratios */ +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) +{ + const int in_w = spl_in->basic_out.src_rect.width; + const int in_h = spl_in->basic_out.src_rect.height; + const int out_w = spl_in->basic_out.dst_rect.width; + const int out_h = spl_in->basic_out.dst_rect.height; + struct spl_rect surf_src = spl_in->basic_in.src_rect; + + /*Swap surf_src height and width since scaling ratios are in recout rotation*/ + if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || + spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) + swap(surf_src.height, surf_src.width); + + spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( + surf_src.width, + spl_in->basic_in.dst_rect.width); + spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( + surf_src.height, + spl_in->basic_in.dst_rect.height); + + if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) + spl_out->scl_data.ratios.horz.value *= 2; + else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) + spl_out->scl_data.ratios.vert.value *= 2; + + spl_out->scl_data.ratios.vert.value = div64_s64( + spl_out->scl_data.ratios.vert.value * in_h, out_h); + spl_out->scl_data.ratios.horz.value = div64_s64( + spl_out->scl_data.ratios.horz.value * in_w, out_w); + + spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; + spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; + + if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 + || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { + spl_out->scl_data.ratios.horz_c.value /= 2; + spl_out->scl_data.ratios.vert_c.value /= 2; + } + spl_out->scl_data.ratios.horz = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz, 19); + spl_out->scl_data.ratios.vert = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert, 19); + spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.horz_c, 19); + spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( + spl_out->scl_data.ratios.vert_c, 19); +} +/* Calculate Viewport size */ +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) +{ + spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, + spl_out->scl_data.recout.height)); + spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, + spl_out->scl_data.recout.width)); + spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, + spl_out->scl_data.recout.height)); + if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || + spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + } +} +static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, + bool horizontal_mirror, + bool *orthogonal_rotation, + bool *flip_vert_scan_dir, + bool *flip_horz_scan_dir) +{ + *orthogonal_rotation = false; + *flip_vert_scan_dir = false; + *flip_horz_scan_dir = false; + if (rotation == SPL_ROTATION_ANGLE_180) { + *flip_vert_scan_dir = true; + *flip_horz_scan_dir = true; + } else if (rotation == SPL_ROTATION_ANGLE_90) { + *orthogonal_rotation = true; + *flip_horz_scan_dir = true; + } else if (rotation == SPL_ROTATION_ANGLE_270) { + *orthogonal_rotation = true; + *flip_vert_scan_dir = true; + } + + if (horizontal_mirror) + *flip_horz_scan_dir = !*flip_horz_scan_dir; +} +/* + * We completely calculate vp offset, size and inits here based entirely on scaling + * ratios and recout for pixel perfect pipe combine. + */ +static void spl_calculate_init_and_vp(bool flip_scan_dir, + int recout_offset_within_recout_full, + int recout_size, + int src_size, + int taps, + struct fixed31_32 ratio, + struct fixed31_32 init_adj, + struct fixed31_32 *init, + int *vp_offset, + int *vp_size) +{ + struct fixed31_32 temp; + int int_part; + + /* + * First of the taps starts sampling pixel number <init_int_part> corresponding to recout + * pixel 1. Next recout pixel samples int part of <init + scaling ratio> and so on. + * All following calculations are based on this logic. + * + * Init calculated according to formula: + * init = (scaling_ratio + number_of_taps + 1) / 2 + * init_bot = init + scaling_ratio + * to get pixel perfect combine add the fraction from calculating vp offset + */ + temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = dc_fixpt_floor(temp); + temp.value &= 0xffffffff; + *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = dc_fixpt_add(*init, init_adj); + *init = dc_fixpt_truncate(*init, 19); + + /* + * If viewport has non 0 offset and there are more taps than covered by init then + * we should decrease the offset and increase init so we are never sampling + * outside of viewport. + */ + int_part = dc_fixpt_floor(*init); + if (int_part < taps) { + int_part = taps - int_part; + if (int_part > *vp_offset) + int_part = *vp_offset; + *vp_offset -= int_part; + *init = dc_fixpt_add_int(*init, int_part); + } + /* + * If taps are sampling outside of viewport at end of recout and there are more pixels + * available in the surface we should increase the viewport size, regardless set vp to + * only what is used. + */ + temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = dc_fixpt_floor(temp); + if (*vp_size + *vp_offset > src_size) + *vp_size = src_size - *vp_offset; + + /* We did all the math assuming we are scanning same direction as display does, + * however mirror/rotation changes how vp scans vs how it is offset. If scan direction + * is flipped we simply need to calculate offset from the other side of plane. + * Note that outside of viewport all scaling hardware works in recout space. + */ + if (flip_scan_dir) + *vp_offset = src_size - *vp_offset - *vp_size; +} + +static bool spl_is_yuv420(enum spl_pixel_format format) +{ + switch (format) { + case SPL_PIXEL_FORMAT_420BPP8: + case SPL_PIXEL_FORMAT_420BPP10: + return true; + default: + return false; + } +} + +/*Calculate inits and viewport */ +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) +{ + struct spl_rect src = spl_in->basic_in.src_rect; + struct spl_rect recout_dst_in_active_timing; + struct spl_rect recout_clip_in_active_timing; + struct spl_rect recout_clip_in_recout_dst; + struct spl_rect overlap_in_active_timing; + struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in); + int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 + || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; + bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; + struct fixed31_32 init_adj_h = dc_fixpt_zero; + struct fixed31_32 init_adj_v = dc_fixpt_zero; + + recout_clip_in_active_timing = shift_rec( + &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); + recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( + spl_in, &spl_in->basic_in.dst_rect); + overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, + &recout_dst_in_active_timing); + if (overlap_in_active_timing.width > 0 && + overlap_in_active_timing.height > 0) + recout_clip_in_recout_dst = shift_rec(&overlap_in_active_timing, + -recout_dst_in_active_timing.x, + -recout_dst_in_active_timing.y); + else + memset(&recout_clip_in_recout_dst, 0, sizeof(struct spl_rect)); + /* + * Work in recout rotation since that requires less transformations + */ + spl_get_vp_scan_direction( + spl_in->basic_in.rotation, + spl_in->basic_in.horizontal_mirror, + &orthogonal_rotation, + &flip_vert_scan_dir, + &flip_horz_scan_dir); + + if (orthogonal_rotation) { + swap(src.width, src.height); + swap(flip_vert_scan_dir, flip_horz_scan_dir); + } + + if (spl_is_yuv420(spl_in->basic_in.format)) { + /* this gives the direction of the cositing (negative will move + * left, right otherwise) + */ + int sign = 1; + + switch (spl_in->basic_in.cositing) { + + case CHROMA_COSITING_LEFT: + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_from_fraction(sign, 2); + break; + case CHROMA_COSITING_NONE: + init_adj_h = dc_fixpt_from_fraction(sign, 2); + init_adj_v = dc_fixpt_from_fraction(sign, 2); + break; + case CHROMA_COSITING_TOPLEFT: + default: + init_adj_h = dc_fixpt_zero; + init_adj_v = dc_fixpt_zero; + break; + } + } + + spl_calculate_init_and_vp( + flip_horz_scan_dir, + recout_clip_in_recout_dst.x, + spl_out->scl_data.recout.width, + src.width, + spl_out->scl_data.taps.h_taps, + spl_out->scl_data.ratios.horz, + dc_fixpt_zero, + &spl_out->scl_data.inits.h, + &spl_out->scl_data.viewport.x, + &spl_out->scl_data.viewport.width); + spl_calculate_init_and_vp( + flip_horz_scan_dir, + recout_clip_in_recout_dst.x, + spl_out->scl_data.recout.width, + src.width / vpc_div, + spl_out->scl_data.taps.h_taps_c, + spl_out->scl_data.ratios.horz_c, + init_adj_h, + &spl_out->scl_data.inits.h_c, + &spl_out->scl_data.viewport_c.x, + &spl_out->scl_data.viewport_c.width); + spl_calculate_init_and_vp( + flip_vert_scan_dir, + recout_clip_in_recout_dst.y, + spl_out->scl_data.recout.height, + src.height, + spl_out->scl_data.taps.v_taps, + spl_out->scl_data.ratios.vert, + dc_fixpt_zero, + &spl_out->scl_data.inits.v, + &spl_out->scl_data.viewport.y, + &spl_out->scl_data.viewport.height); + spl_calculate_init_and_vp( + flip_vert_scan_dir, + recout_clip_in_recout_dst.y, + spl_out->scl_data.recout.height, + src.height / vpc_div, + spl_out->scl_data.taps.v_taps_c, + spl_out->scl_data.ratios.vert_c, + init_adj_v, + &spl_out->scl_data.inits.v_c, + &spl_out->scl_data.viewport_c.y, + &spl_out->scl_data.viewport_c.height); + if (orthogonal_rotation) { + swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); + swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); + swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); + swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + } + spl_out->scl_data.viewport.x += src.x; + spl_out->scl_data.viewport.y += src.y; + ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_out->scl_data.viewport_c.x += src.x / vpc_div; + spl_out->scl_data.viewport_c.y += src.y / vpc_div; +} +static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) +{ + /* + * Handle side by side and top bottom 3d recout offsets after vp calculation + * since 3d is special and needs to calculate vp as if there is no recout offset + * This may break with rotation, good thing we aren't mixing hw rotation and 3d + */ + if (spl_in->basic_in.mpc_combine_v) { + ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && + spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); + if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) + recout->y += recout->height; + else if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) + recout->x += recout->width; + } +} + +static void spl_clamp_viewport(struct spl_rect *viewport) +{ + /* Clamp minimum viewport size */ + if (viewport->height < MIN_VIEWPORT_SIZE) + viewport->height = MIN_VIEWPORT_SIZE; + if (viewport->width < MIN_VIEWPORT_SIZE) + viewport->width = MIN_VIEWPORT_SIZE; +} +static bool spl_dscl_is_420_format(enum spl_pixel_format format) +{ + if (format == SPL_PIXEL_FORMAT_420BPP8 || + format == SPL_PIXEL_FORMAT_420BPP10) + return true; + else + return false; +} +static bool spl_dscl_is_video_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN + && format <= SPL_PIXEL_FORMAT_VIDEO_END) + return true; + else + return false; +} +static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, + const struct spl_scaler_data *data) +{ + const long long one = dc_fixpt_one.value; + enum spl_pixel_format pixel_format = spl_in->basic_in.format; + + if (data->ratios.horz.value == one + && data->ratios.vert.value == one + && data->ratios.horz_c.value == one + && data->ratios.vert_c.value == one + && !spl_in->basic_out.always_scale) + return SCL_MODE_SCALING_444_BYPASS; + + if (!spl_dscl_is_420_format(pixel_format)) { + if (spl_dscl_is_video_format(pixel_format)) + return SCL_MODE_SCALING_444_YCBCR_ENABLE; + else + return SCL_MODE_SCALING_444_RGB_ENABLE; + } + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; + + return SCL_MODE_SCALING_420_YCBCR_ENABLE; +} +/* Calculate optimal number of taps */ +static bool spl_get_optimal_number_of_taps( + int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, + const struct spl_taps *in_taps) +{ + int num_part_y, num_part_c; + int max_taps_y, max_taps_c; + int min_taps_y, min_taps_c; + enum lb_memory_config lb_config; + + if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && + max_downscale_src_width != 0 && + spl_out->scl_data.viewport.width > max_downscale_src_width) + return false; + /* + * Set default taps if none are provided + * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling + * taps = 4 for upscaling + */ + if (in_taps->h_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) + spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); + else + spl_out->scl_data.taps.h_taps = 4; + } else + spl_out->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) + spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert, 2)), 8); + else + spl_out->scl_data.taps.v_taps = 4; + } else + spl_out->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) + spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( + spl_out->scl_data.ratios.vert_c, 2)), 8); + else + spl_out->scl_data.taps.v_taps_c = 4; + } else + spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) + spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + else + spl_out->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + else + spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + + /*Ensure we can support the requested number of vtaps*/ + min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); + + /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ + if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) + || (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10)) + lb_config = LB_MEMORY_CONFIG_3; + else + lb_config = LB_MEMORY_CONFIG_0; + // Determine max vtap support by calculating how much line buffer can fit + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, + lb_config, &num_part_y, &num_part_c); + /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); + else + max_taps_y = num_part_y; + + if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); + else + max_taps_c = num_part_c; + + if (max_taps_y < min_taps_y) + return false; + else if (max_taps_c < min_taps_c) + return false; + + if (spl_out->scl_data.taps.v_taps > max_taps_y) + spl_out->scl_data.taps.v_taps = max_taps_y; + + if (spl_out->scl_data.taps.v_taps_c > max_taps_c) + spl_out->scl_data.taps.v_taps_c = max_taps_c; + if (spl_in->prefer_easf) { + // EASF can be enabled only for taps 3,4,6 + // If optimal no of taps is 5, then set it to 4 + // If optimal no of taps is 7 or 8, then set it to 6 + if (spl_out->scl_data.taps.v_taps == 5) + spl_out->scl_data.taps.v_taps = 4; + if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) + spl_out->scl_data.taps.v_taps = 6; + + if (spl_out->scl_data.taps.v_taps_c == 5) + spl_out->scl_data.taps.v_taps_c = 4; + if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) + spl_out->scl_data.taps.v_taps_c = 6; + + if (spl_out->scl_data.taps.h_taps == 5) + spl_out->scl_data.taps.h_taps = 4; + if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) + spl_out->scl_data.taps.h_taps = 6; + + if (spl_out->scl_data.taps.h_taps_c == 5) + spl_out->scl_data.taps.h_taps_c = 4; + if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) + spl_out->scl_data.taps.h_taps_c = 6; + + } // end of if prefer_easf + if (!spl_in->basic_out.always_scale) { + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) + spl_out->scl_data.taps.h_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) + spl_out->scl_data.taps.v_taps = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) + spl_out->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) + spl_out->scl_data.taps.v_taps_c = 1; + } + return true; +} +static void spl_set_black_color_data(enum spl_pixel_format format, + struct scl_black_color *scl_black_color) +{ + bool ycbcr = format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN + && format <= SPL_PIXEL_FORMAT_VIDEO_END; + if (ycbcr) { + scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; + scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; + } else { + scl_black_color->offset_rgb_y = 0x0; + scl_black_color->offset_rgb_cbcr = 0x0; + } +} + +static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *scl_data) +{ + struct fixed31_32 bot; + + dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + /* + * 0.24 format for fraction, first five bits zeroed + */ + dscl_prog_data->init.h_filter_init_frac = + dc_fixpt_u0d19(scl_data->inits.h) << 5; + dscl_prog_data->init.h_filter_init_int = + dc_fixpt_floor(scl_data->inits.h); + dscl_prog_data->init.h_filter_init_frac_c = + dc_fixpt_u0d19(scl_data->inits.h_c) << 5; + dscl_prog_data->init.h_filter_init_int_c = + dc_fixpt_floor(scl_data->inits.h_c); + dscl_prog_data->init.v_filter_init_frac = + dc_fixpt_u0d19(scl_data->inits.v) << 5; + dscl_prog_data->init.v_filter_init_int = + dc_fixpt_floor(scl_data->inits.v); + dscl_prog_data->init.v_filter_init_frac_c = + dc_fixpt_u0d19(scl_data->inits.v_c) << 5; + dscl_prog_data->init.v_filter_init_int_c = + dc_fixpt_floor(scl_data->inits.v_c); + + bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); + bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); +} + +static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *scl_data) +{ + dscl_prog_data->taps.v_taps = scl_data->taps.v_taps - 1; + dscl_prog_data->taps.h_taps = scl_data->taps.h_taps - 1; + dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; + dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; +} +static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + return NULL; + } +} +static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); +} +/* Populate dscl prog data structure from scaler data calculated by SPL */ +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) +{ + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; + + const struct spl_scaler_data *data = &spl_out->scl_data; + + struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; + + // Set values for recout + dscl_prog_data->recout = spl_out->scl_data.recout; + // Set values for MPC Size + dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; + + // SCL_MODE - Set SCL_MODE data + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); + + // SCL_BLACK_COLOR + spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); + + /* Manually calculate scale ratio and init values */ + spl_set_manual_ratio_init_data(dscl_prog_data, data); + + // Set HTaps/VTaps + spl_set_taps_data(dscl_prog_data, data); + // Set viewport + dscl_prog_data->viewport = spl_out->scl_data.viewport; + // Set viewport_c + dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; + // Set filters data + spl_set_filters_data(dscl_prog_data, data); +} +/* Enable EASF ?*/ +static bool enable_easf(int scale_ratio, int taps, + enum linear_light_scaling lls_pref, bool prefer_easf) +{ + // Is downscaling > 6:1 ? + if (scale_ratio > 6) { + // END - No EASF support for downscaling > 6:1 + return false; + } + // Is upscaling or downscaling up to 2:1? + if (scale_ratio <= 2) { + // Is linear scaling or EASF preferred? + if (lls_pref == LLS_PREF_YES || prefer_easf) { + // LB support taps 3, 4, 6 + if (taps == 3 || taps == 4 || taps == 6) { + // END - EASF supported + return true; + } + } + } + // END - EASF not supported + return false; +} +/* Set EASF data */ +static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, + bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref) +{ + dscl_prog_data->easf_matrix_mode = 0; + if (enable_easf_v) { + dscl_prog_data->easf_v_en = true; + dscl_prog_data->easf_v_ring = 1; + dscl_prog_data->easf_v_sharp_factor = 1; + dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable + dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode + dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = + 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) + dscl_prog_data->easf_v_ringest_3tap_uptilt_max = + 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) + dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = + 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) + dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = + 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) + dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = + 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) + dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = + 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) + dscl_prog_data->easf_v_ringest_eventap_reduceg1 = + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + dscl_prog_data->easf_v_ringest_eventap_reduceg2 = + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + dscl_prog_data->easf_v_ringest_eventap_gain1 = + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + dscl_prog_data->easf_v_ringest_eventap_gain2 = + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 + dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 + dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 + dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_in_set1 = + 0x0B37; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0078125 * 125^3) + dscl_prog_data->easf_v_bf3_pwl_base_set1 = 62; // S0.6, BF3 Base PWL Segment 1 + dscl_prog_data->easf_v_bf3_pwl_slope_set1 = + 0x13B8; // FP1.6.6, BF3 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf3_pwl_in_set2 = + 0x0BB7; // FP0.6.6, BF3 Input value PWL Segment 2 (0.03125 * 125^3) + dscl_prog_data->easf_v_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2 + dscl_prog_data->easf_v_bf3_pwl_slope_set2 = + 0x1356; // FP1.6.6, BF3 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf3_pwl_in_set3 = + 0x0BF7; // FP0.6.6, BF3 Input value PWL Segment 3 (0.0625 * 125^3) + dscl_prog_data->easf_v_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3 + dscl_prog_data->easf_v_bf3_pwl_slope_set3 = + 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 + dscl_prog_data->easf_v_bf3_pwl_in_set4 = + 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_slope_set4 = + 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_in_set5 = + 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + } else { + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf3_pwl_in_set1 = + 0x06C0; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0625) + dscl_prog_data->easf_v_bf3_pwl_base_set1 = 63; // S0.6, BF3 Base PWL Segment 1 + dscl_prog_data->easf_v_bf3_pwl_slope_set1 = 0x1896; // FP1.6.6, BF3 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf3_pwl_in_set2 = + 0x0700; // FP0.6.6, BF3 Input value PWL Segment 2 (0.125) + dscl_prog_data->easf_v_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2 + dscl_prog_data->easf_v_bf3_pwl_slope_set2 = 0x1810; // FP1.6.6, BF3 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf3_pwl_in_set3 = + 0x0740; // FP0.6.6, BF3 Input value PWL Segment 3 (0.25) + dscl_prog_data->easf_v_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3 + dscl_prog_data->easf_v_bf3_pwl_slope_set3 = + 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 + dscl_prog_data->easf_v_bf3_pwl_in_set4 = + 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) + dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_in_set5 = + 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) + dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + } + } + if (enable_easf_h) { + dscl_prog_data->easf_h_en = true; + dscl_prog_data->easf_h_ring = 1; + dscl_prog_data->easf_h_sharp_factor = 1; + dscl_prog_data->easf_h_bf1_en = + 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable + dscl_prog_data->easf_h_bf2_mode = + 0xF; // 4-bit, BF2 calculation mode + dscl_prog_data->easf_h_bf3_mode = + 2; // 2-bit, BF3 chroma mode correction calculation mode + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + dscl_prog_data->easf_h_ringest_eventap_reduceg1 = + 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + dscl_prog_data->easf_h_ringest_eventap_reduceg2 = + 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + dscl_prog_data->easf_h_ringest_eventap_gain1 = + 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + dscl_prog_data->easf_h_ringest_eventap_gain2 = + 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 + dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 + dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 + dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_in_set1 = + 0x0B37; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0078125 * 125^3) + dscl_prog_data->easf_h_bf3_pwl_base_set1 = 62; // S0.6, BF3 Base PWL Segment 1 + dscl_prog_data->easf_h_bf3_pwl_slope_set1 = 0x13B8; // FP1.6.6, BF3 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf3_pwl_in_set2 = + 0x0BB7; // FP0.6.6, BF3 Input value PWL Segment 2 (0.03125 * 125^3) + dscl_prog_data->easf_h_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2 + dscl_prog_data->easf_h_bf3_pwl_slope_set2 = 0x1356; // FP1.6.6, BF3 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf3_pwl_in_set3 = + 0x0BF7; // FP0.6.6, BF3 Input value PWL Segment 3 (0.0625 * 125^3) + dscl_prog_data->easf_h_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3 + dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 + dscl_prog_data->easf_h_bf3_pwl_in_set4 = + 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_in_set5 = + 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + } else { + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf3_pwl_in_set1 = + 0x06C0; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0625) + dscl_prog_data->easf_h_bf3_pwl_base_set1 = 63; // S0.6, BF3 Base PWL Segment 1 + dscl_prog_data->easf_h_bf3_pwl_slope_set1 = 0x1896; // FP1.6.6, BF3 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf3_pwl_in_set2 = + 0x0700; // FP0.6.6, BF3 Input value PWL Segment 2 (0.125) + dscl_prog_data->easf_h_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2 + dscl_prog_data->easf_h_bf3_pwl_slope_set2 = 0x1810; // FP1.6.6, BF3 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf3_pwl_in_set3 = + 0x0740; // FP0.6.6, BF3 Input value PWL Segment 3 (0.25) + dscl_prog_data->easf_h_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3 + dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 + dscl_prog_data->easf_h_bf3_pwl_in_set4 = + 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) + dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_in_set5 = + 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) + dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + } // if (lls_pref == LLS_PREF_YES) + } + if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_ltonl_en = 1; // Linear input + dscl_prog_data->easf_matrix_c0 = + 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) + dscl_prog_data->easf_matrix_c1 = + 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) + dscl_prog_data->easf_matrix_c2 = + 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } else { + dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input + dscl_prog_data->easf_matrix_c0 = + 0x3434; // fp1.5.10, C0 coefficient (LN_BT2020: 0.262695312500000) + dscl_prog_data->easf_matrix_c1 = + 0x396D; // fp1.5.10, C1 coefficient (LN_BT2020: 0.678222656250000) + dscl_prog_data->easf_matrix_c2 = + 0x2B97; // fp1.5.10, C2 coefficient (LN_BT2020: 0.059295654296875) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } +} +/*Set isharp noise detection */ +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) +{ + // ISHARP_NOISEDET_MODE + // 0: 3x5 as VxH + // 1: 4x5 as VxH + // 2: + // 3: 5x5 as VxH + if (dscl_prog_data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE + else if (dscl_prog_data->taps.h_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE +}; +/* Set EASF data */ +static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, + struct adaptive_sharpness adp_sharpness) +{ + dscl_prog_data->isharp_en = 1; // ISHARP_EN + dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN + // Set ISHARP_NOISEDET_MODE if htaps = 6-tap + if (dscl_prog_data->taps.h_taps == 6) + spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE + // Program noise detection threshold + dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE + dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE + // Program noise detection gain + dscl_prog_data->isharp_noise_det.pwl_start_in = 3; // ISHARP_NOISEDET_PWL_START_IN + dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN + dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE + + dscl_prog_data->isharp_fmt.mode = 1; // ISHARP_FMT_MODE + dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM + dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + switch (adp_sharpness.sharpness) { + case SHARPNESS_LOW: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); + break; + case SHARPNESS_MID: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); + break; + case SHARPNESS_HIGH: + dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); + break; + default: + BREAK_TO_DEBUGGER(); + } + + // Program the nldelta soft clip values + dscl_prog_data->isharp_nldelta_sclip.enable_p = 1; // ISHARP_NLDELTA_SCLIP_EN_P + dscl_prog_data->isharp_nldelta_sclip.pivot_p = 70; // ISHARP_NLDELTA_SCLIP_PIVOT_P + dscl_prog_data->isharp_nldelta_sclip.slope_p = 24; // ISHARP_NLDELTA_SCLIP_SLOPE_P + dscl_prog_data->isharp_nldelta_sclip.enable_n = 1; // ISHARP_NLDELTA_SCLIP_EN_N + dscl_prog_data->isharp_nldelta_sclip.pivot_n = 70; // ISHARP_NLDELTA_SCLIP_PIVOT_N + dscl_prog_data->isharp_nldelta_sclip.slope_n = 24; // ISHARP_NLDELTA_SCLIP_SLOPE_N + + // Set the values as per lookup table +} +static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, + int vscale_ratio, int hscale_ratio, struct spl_taps taps) +{ + bool enable_isharp = false; + + if (adp_sharpness.enable == false) + return enable_isharp; // Return if adaptive sharpness is disabled + // Is downscaling ? + if (vscale_ratio > 1 || hscale_ratio > 1) { + // END - No iSHARP support for downscaling + return enable_isharp; + } + // Scaling is up to 1:1 (no scaling) or upscaling + + // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 + if (taps.h_taps == 4 || taps.h_taps == 6 || + taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { + // END - iSHARP supported + enable_isharp = true; + } + return enable_isharp; +} +/* Caclulate scaler parameters */ +bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) +{ + bool res = false; + bool enable_easf_v = false; + bool enable_easf_h = false; + // All SPL calls + /* recout calculation */ + /* depends on h_active */ + spl_calculate_recout(spl_in, spl_out); + /* depends on pixel format */ + spl_calculate_scaling_ratios(spl_in, spl_out); + /* depends on scaling ratios and recout, does not calculate offset yet */ + spl_calculate_viewport_size(spl_in, spl_out); + + res = spl_get_optimal_number_of_taps( + spl_in->basic_out.max_downscale_src_width, spl_in, + spl_out, &spl_in->scaling_quality); + /* + * Depends on recout, scaling ratios, h_active and taps + * May need to re-check lb size after this in some obscure scenario + */ + if (res) + spl_calculate_inits_and_viewports(spl_in, spl_out); + // Handle 3d recout + spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); + // Clamp + spl_clamp_viewport(&spl_out->scl_data.viewport); + + if (!res) + return res; + // Save all calculated parameters in dscl_prog_data structure to program hw registers + spl_set_dscl_prog_data(spl_in, spl_out); + // Enable EASF on vertical? + int vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + int hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); + // Enable EASF on horizontal? + enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); + // Set EASF + spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref); + // Set iSHARP + bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, + spl_out->scl_data.taps); + if (enable_isharp) + spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness); + return res; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h new file mode 100644 index 000000000000..f1fd3eb92f8a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_SPL_H__ +#define __DC_SPL_H__ + +#include "dc_spl_types.h" +#define BLACK_OFFSET_RGB_Y 0x0 +#define BLACK_OFFSET_CBCR 0x8000 + +#ifdef __cplusplus +extern "C" { +#endif + +/* SPL interfaces */ + +bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); + +#ifdef __cplusplus +} +#endif + +#endif /* __DC_SPL_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c new file mode 100644 index 000000000000..26b48b3576a5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_types.h" +#include "dc_spl_isharp_filters.h" + +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 2.000000 +// B_start = 11.000000 +// B_end = 86.000000 +// C_start = 40.000000 +// C_end = 64.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_0[32] = { +0x02010000, +0x0A070503, +0x1614100D, +0x1C1B1918, +0x22211F1E, +0x27262423, +0x2A2A2928, +0x2D2D2C2B, +0x302F2F2E, +0x31313030, +0x31313131, +0x31313131, +0x30303031, +0x292D2F2F, +0x191D2125, +0x050A0F14, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +}; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 0.500000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 96.000000 +// C_end = 127.000000 +//======================================== + +static const uint32_t filter_isharp_1D_lut_0p5x[32] = { +0x00000000, +0x02020101, +0x06050403, +0x07070606, +0x09080808, +0x0A0A0A09, +0x0C0B0B0B, +0x0D0D0C0C, +0x0E0E0D0D, +0x0F0F0E0E, +0x100F0F0F, +0x10101010, +0x11111010, +0x11111111, +0x11111111, +0x11111111, +0x11111111, +0x11111111, +0x11111111, +0x10101111, +0x10101010, +0x0F0F0F10, +0x0E0E0F0F, +0x0D0D0E0E, +0x0C0C0D0D, +0x0B0B0B0C, +0x090A0A0A, +0x08080809, +0x06060707, +0x04050506, +0x02030304, +0x00010102, +}; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 1.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 96.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_1p0x[32] = { +0x01000000, +0x05040302, +0x0B0A0806, +0x0E0E0D0C, +0x1211100F, +0x15141312, +0x17171615, +0x1A191918, +0x1C1B1B1A, +0x1E1D1D1C, +0x1F1F1E1E, +0x2020201F, +0x21212121, +0x22222222, +0x23232222, +0x23232323, +0x23232323, +0x22222323, +0x22222222, +0x21212121, +0x1F202020, +0x1E1E1F1F, +0x1C1D1D1E, +0x1A1B1B1C, +0x1819191A, +0x15161717, +0x12131415, +0x0F101112, +0x0C0D0E0E, +0x08090A0B, +0x04050607, +0x00010203, +}; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 1.500000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 96.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_1p5x[32] = { +0x01010000, +0x07050402, +0x110F0C0A, +0x16141312, +0x1B191817, +0x1F1E1D1C, +0x23222120, +0x26262524, +0x2A292827, +0x2C2C2B2A, +0x2F2E2E2D, +0x3130302F, +0x32323131, +0x33333332, +0x34343433, +0x34343434, +0x34343434, +0x33343434, +0x32333333, +0x31313232, +0x2F303031, +0x2D2E2E2F, +0x2A2B2C2C, +0x2728292A, +0x24252626, +0x20212223, +0x1C1D1E1F, +0x1718191B, +0x12131416, +0x0C0E0F10, +0x0608090B, +0x00020305 +}; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 2.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 40.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_2p0x[32] = { +0x02010000, +0x0A070503, +0x1614100D, +0x1D1B1A18, +0x2322201F, +0x29282625, +0x2F2D2C2B, +0x33323130, +0x38373534, +0x3B3A3938, +0x3E3E3D3C, +0x4140403F, +0x43424241, +0x44444443, +0x45454545, +0x46454545, +0x45454546, +0x45454545, +0x43444444, +0x41424243, +0x3F404041, +0x3C3D3E3E, +0x38393A3B, +0x34353738, +0x30313233, +0x2B2C2D2F, +0x25262829, +0x1F202223, +0x181A1B1D, +0x10121416, +0x080B0D0E, +0x00020406, +}; +// Wide scaler coefficients +//======================================================== +// <using> gen_scaler_coeffs.m +// <date> 15-Dec-2021 +// <coeffDescrip> 6t_64p_LanczosEd_p_1_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint32_t filter_isharp_wide_6tap_64p[198] = { +0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, +0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, +0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, +0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, +0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, +0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, +0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, +0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, +0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, +0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, +0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, +0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, +0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, +0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, +0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, +0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, +0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, +0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, +0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, +0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, +0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, +0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, +0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, +0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, +0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, +0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, +0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, +0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, +0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, +0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, +0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, +0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, +0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019 +}; +// Blur and scale coefficients +//======================================================== +// <using> gen_BlurScale_coeffs.m +// <date> 25-Apr-2022 +// <num_taps> 4 +// <num_phases> 64 +// <CoefType> Blur & Scale LPF +// <CoefQuant> S1.10 +//======================================================== +static const uint32_t filter_isharp_bs_4tap_64p[198] = { +0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, +0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, +0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, +0x0000, 0x00D0, 0x0235, 0x00FA, 0x0001, 0x0000, +0x0000, 0x00C9, 0x0234, 0x0101, 0x0002, 0x0000, +0x0000, 0x00C2, 0x0233, 0x0108, 0x0003, 0x0000, +0x0000, 0x00BB, 0x0232, 0x0110, 0x0003, 0x0000, +0x0000, 0x00B5, 0x0230, 0x0117, 0x0004, 0x0000, +0x0000, 0x00AE, 0x022E, 0x011F, 0x0005, 0x0000, +0x0000, 0x00A8, 0x022C, 0x0126, 0x0006, 0x0000, +0x0000, 0x00A2, 0x022A, 0x012D, 0x0007, 0x0000, +0x0000, 0x009C, 0x0228, 0x0134, 0x0008, 0x0000, +0x0000, 0x0096, 0x0225, 0x013C, 0x0009, 0x0000, +0x0000, 0x0090, 0x0222, 0x0143, 0x000B, 0x0000, +0x0000, 0x008A, 0x021F, 0x014B, 0x000C, 0x0000, +0x0000, 0x0085, 0x021C, 0x0151, 0x000E, 0x0000, +0x0000, 0x007F, 0x0218, 0x015A, 0x000F, 0x0000, +0x0000, 0x007A, 0x0215, 0x0160, 0x0011, 0x0000, +0x0000, 0x0074, 0x0211, 0x0168, 0x0013, 0x0000, +0x0000, 0x006F, 0x020D, 0x016F, 0x0015, 0x0000, +0x0000, 0x006A, 0x0209, 0x0176, 0x0017, 0x0000, +0x0000, 0x0065, 0x0204, 0x017E, 0x0019, 0x0000, +0x0000, 0x0060, 0x0200, 0x0185, 0x001B, 0x0000, +0x0000, 0x005C, 0x01FB, 0x018C, 0x001D, 0x0000, +0x0000, 0x0057, 0x01F6, 0x0193, 0x0020, 0x0000, +0x0000, 0x0053, 0x01F1, 0x019A, 0x0022, 0x0000, +0x0000, 0x004E, 0x01EC, 0x01A1, 0x0025, 0x0000, +0x0000, 0x004A, 0x01E6, 0x01A8, 0x0028, 0x0000, +0x0000, 0x0046, 0x01E1, 0x01AF, 0x002A, 0x0000, +0x0000, 0x0042, 0x01DB, 0x01B6, 0x002D, 0x0000, +0x0000, 0x003F, 0x01D5, 0x01BB, 0x0031, 0x0000, +0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, +0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 +}; +const uint32_t *spl_get_filter_isharp_1D_lut_0(void) +{ + return filter_isharp_1D_lut_0; +} +const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void) +{ + return filter_isharp_1D_lut_0p5x; +} +const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void) +{ + return filter_isharp_1D_lut_1p0x; +} +const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void) +{ + return filter_isharp_1D_lut_1p5x; +} +const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) +{ + return filter_isharp_1D_lut_2p0x; +} +const uint32_t *spl_get_filter_isharp_wide_6tap_64p(void) +{ + return filter_isharp_wide_6tap_64p; +} +const uint32_t *spl_get_filter_isharp_bs_4tap_64p(void) +{ + return filter_isharp_bs_4tap_64p; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h new file mode 100644 index 000000000000..ff189d86e534 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_SPL_ISHARP_FILTERS_H__ +#define __DC_SPL_ISHARP_FILTERS_H__ + +#include "dc_spl_types.h" + +const uint32_t *spl_get_filter_isharp_1D_lut_0(void); +const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); +const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); +const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); +const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); +const uint32_t *spl_get_filter_isharp_bs_4tap_64p(void); +const uint32_t *spl_get_filter_isharp_wide_6tap_64p(void); +#endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c new file mode 100644 index 000000000000..c174b2e8a150 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -0,0 +1,1425 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_types.h" +#include "dc_spl_scl_filters.h" +//========================================= +// <num_taps> = 2 +// <num_phases> = 16 +// <scale_ratio> = 0.833333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = s1.10 +// <CoefOut> = s1.12 +//========================================= +static const uint16_t filter_2tap_16p[18] = { + 0x1000, 0x0000, + 0x0FF0, 0x0010, + 0x0FB0, 0x0050, + 0x0F34, 0x00CC, + 0x0E68, 0x0198, + 0x0D44, 0x02BC, + 0x0BC4, 0x043C, + 0x09FC, 0x0604, + 0x0800, 0x0800 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 16 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_16p_upscale[27] = { + 0x0804, 0x07FC, 0x0000, + 0x06AC, 0x0978, 0x3FDC, + 0x055C, 0x0AF0, 0x3FB4, + 0x0420, 0x0C50, 0x3F90, + 0x0300, 0x0D88, 0x3F78, + 0x0200, 0x0E90, 0x3F70, + 0x0128, 0x0F5C, 0x3F7C, + 0x007C, 0x0FD8, 0x3FAC, + 0x0000, 0x1000, 0x0000 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 16 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_16p_116[27] = { + 0x0804, 0x07FC, 0x0000, + 0x0700, 0x0914, 0x3FEC, + 0x0604, 0x0A1C, 0x3FE0, + 0x050C, 0x0B14, 0x3FE0, + 0x041C, 0x0BF4, 0x3FF0, + 0x0340, 0x0CB0, 0x0010, + 0x0274, 0x0D3C, 0x0050, + 0x01C0, 0x0D94, 0x00AC, + 0x0128, 0x0DB4, 0x0124 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 16 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_16p_149[27] = { + 0x0804, 0x07FC, 0x0000, + 0x0730, 0x08CC, 0x0004, + 0x0660, 0x098C, 0x0014, + 0x0590, 0x0A3C, 0x0034, + 0x04C4, 0x0AD4, 0x0068, + 0x0400, 0x0B54, 0x00AC, + 0x0348, 0x0BB0, 0x0108, + 0x029C, 0x0BEC, 0x0178, + 0x0200, 0x0C00, 0x0200 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 16 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_16p_183[27] = { + 0x0804, 0x07FC, 0x0000, + 0x0754, 0x0880, 0x002C, + 0x06A8, 0x08F0, 0x0068, + 0x05FC, 0x0954, 0x00B0, + 0x0550, 0x09AC, 0x0104, + 0x04A8, 0x09F0, 0x0168, + 0x0408, 0x0A20, 0x01D8, + 0x036C, 0x0A40, 0x0254, + 0x02DC, 0x0A48, 0x02DC +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 16 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_16p_upscale[36] = { + 0x0000, 0x1000, 0x0000, 0x0000, + 0x3F74, 0x0FDC, 0x00B4, 0x3FFC, + 0x3F0C, 0x0F70, 0x0194, 0x3FF0, + 0x3ECC, 0x0EC4, 0x0298, 0x3FD8, + 0x3EAC, 0x0DE4, 0x03B8, 0x3FB8, + 0x3EA4, 0x0CD8, 0x04F4, 0x3F90, + 0x3EB8, 0x0BA0, 0x0644, 0x3F64, + 0x3ED8, 0x0A54, 0x07A0, 0x3F34, + 0x3F00, 0x08FC, 0x0900, 0x3F04 +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 16 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_16p_116[36] = { + 0x01A8, 0x0CB4, 0x01A4, 0x0000, + 0x0110, 0x0CB0, 0x0254, 0x3FEC, + 0x0090, 0x0C80, 0x031C, 0x3FD4, + 0x0024, 0x0C2C, 0x03F4, 0x3FBC, + 0x3FD8, 0x0BAC, 0x04DC, 0x3FA0, + 0x3F9C, 0x0B14, 0x05CC, 0x3F84, + 0x3F70, 0x0A60, 0x06C4, 0x3F6C, + 0x3F5C, 0x098C, 0x07BC, 0x3F5C, + 0x3F54, 0x08AC, 0x08AC, 0x3F54 +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 16 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_16p_149[36] = { + 0x02B8, 0x0A90, 0x02B8, 0x0000, + 0x0230, 0x0A90, 0x0350, 0x3FF0, + 0x01B8, 0x0A78, 0x03F0, 0x3FE0, + 0x0148, 0x0A48, 0x049C, 0x3FD4, + 0x00E8, 0x0A00, 0x054C, 0x3FCC, + 0x0098, 0x09A0, 0x0600, 0x3FC8, + 0x0054, 0x0928, 0x06B4, 0x3FD0, + 0x001C, 0x08A4, 0x0760, 0x3FE0, + 0x3FFC, 0x0804, 0x0804, 0x3FFC +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 16 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_16p_183[36] = { + 0x03B0, 0x08A0, 0x03B0, 0x0000, + 0x0348, 0x0898, 0x041C, 0x0004, + 0x02DC, 0x0884, 0x0490, 0x0010, + 0x0278, 0x0864, 0x0500, 0x0024, + 0x021C, 0x0838, 0x0570, 0x003C, + 0x01C8, 0x07FC, 0x05E0, 0x005C, + 0x0178, 0x07B8, 0x064C, 0x0084, + 0x0130, 0x076C, 0x06B0, 0x00B4, + 0x00F0, 0x0714, 0x0710, 0x00EC +}; + +//========================================= +// <num_taps> = 2 +// <num_phases> = 64 +// <scale_ratio> = 0.833333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = s1.10 +// <CoefOut> = s1.12 +//========================================= +static const uint16_t filter_2tap_64p[66] = { + 0x1000, 0x0000, + 0x1000, 0x0000, + 0x0FFC, 0x0004, + 0x0FF8, 0x0008, + 0x0FF0, 0x0010, + 0x0FE4, 0x001C, + 0x0FD8, 0x0028, + 0x0FC4, 0x003C, + 0x0FB0, 0x0050, + 0x0F98, 0x0068, + 0x0F7C, 0x0084, + 0x0F58, 0x00A8, + 0x0F34, 0x00CC, + 0x0F08, 0x00F8, + 0x0ED8, 0x0128, + 0x0EA4, 0x015C, + 0x0E68, 0x0198, + 0x0E28, 0x01D8, + 0x0DE4, 0x021C, + 0x0D98, 0x0268, + 0x0D44, 0x02BC, + 0x0CEC, 0x0314, + 0x0C90, 0x0370, + 0x0C2C, 0x03D4, + 0x0BC4, 0x043C, + 0x0B58, 0x04A8, + 0x0AE8, 0x0518, + 0x0A74, 0x058C, + 0x09FC, 0x0604, + 0x0980, 0x0680, + 0x0900, 0x0700, + 0x0880, 0x0780, + 0x0800, 0x0800 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_64p_upscale[99] = { + 0x0804, 0x07FC, 0x0000, + 0x07A8, 0x0860, 0x3FF8, + 0x0754, 0x08BC, 0x3FF0, + 0x0700, 0x0918, 0x3FE8, + 0x06AC, 0x0978, 0x3FDC, + 0x0654, 0x09D8, 0x3FD4, + 0x0604, 0x0A34, 0x3FC8, + 0x05B0, 0x0A90, 0x3FC0, + 0x055C, 0x0AF0, 0x3FB4, + 0x050C, 0x0B48, 0x3FAC, + 0x04BC, 0x0BA0, 0x3FA4, + 0x0470, 0x0BF4, 0x3F9C, + 0x0420, 0x0C50, 0x3F90, + 0x03D8, 0x0C9C, 0x3F8C, + 0x038C, 0x0CF0, 0x3F84, + 0x0344, 0x0D40, 0x3F7C, + 0x0300, 0x0D88, 0x3F78, + 0x02BC, 0x0DD0, 0x3F74, + 0x027C, 0x0E14, 0x3F70, + 0x023C, 0x0E54, 0x3F70, + 0x0200, 0x0E90, 0x3F70, + 0x01C8, 0x0EC8, 0x3F70, + 0x0190, 0x0EFC, 0x3F74, + 0x015C, 0x0F2C, 0x3F78, + 0x0128, 0x0F5C, 0x3F7C, + 0x00FC, 0x0F7C, 0x3F88, + 0x00CC, 0x0FA4, 0x3F90, + 0x00A4, 0x0FC0, 0x3F9C, + 0x007C, 0x0FD8, 0x3FAC, + 0x0058, 0x0FE8, 0x3FC0, + 0x0038, 0x0FF4, 0x3FD4, + 0x0018, 0x1000, 0x3FE8, + 0x0000, 0x1000, 0x0000 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_64p_116[99] = { + 0x0804, 0x07FC, 0x0000, + 0x07C0, 0x0844, 0x3FFC, + 0x0780, 0x0888, 0x3FF8, + 0x0740, 0x08D0, 0x3FF0, + 0x0700, 0x0914, 0x3FEC, + 0x06C0, 0x0958, 0x3FE8, + 0x0684, 0x0998, 0x3FE4, + 0x0644, 0x09DC, 0x3FE0, + 0x0604, 0x0A1C, 0x3FE0, + 0x05C4, 0x0A5C, 0x3FE0, + 0x0588, 0x0A9C, 0x3FDC, + 0x0548, 0x0ADC, 0x3FDC, + 0x050C, 0x0B14, 0x3FE0, + 0x04CC, 0x0B54, 0x3FE0, + 0x0490, 0x0B8C, 0x3FE4, + 0x0458, 0x0BC0, 0x3FE8, + 0x041C, 0x0BF4, 0x3FF0, + 0x03E0, 0x0C28, 0x3FF8, + 0x03A8, 0x0C58, 0x0000, + 0x0374, 0x0C88, 0x0004, + 0x0340, 0x0CB0, 0x0010, + 0x0308, 0x0CD8, 0x0020, + 0x02D8, 0x0CFC, 0x002C, + 0x02A0, 0x0D20, 0x0040, + 0x0274, 0x0D3C, 0x0050, + 0x0244, 0x0D58, 0x0064, + 0x0214, 0x0D70, 0x007C, + 0x01E8, 0x0D84, 0x0094, + 0x01C0, 0x0D94, 0x00AC, + 0x0198, 0x0DA0, 0x00C8, + 0x0170, 0x0DAC, 0x00E4, + 0x014C, 0x0DB0, 0x0104, + 0x0128, 0x0DB4, 0x0124 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_64p_149[99] = { + 0x0804, 0x07FC, 0x0000, + 0x07CC, 0x0834, 0x0000, + 0x0798, 0x0868, 0x0000, + 0x0764, 0x089C, 0x0000, + 0x0730, 0x08CC, 0x0004, + 0x0700, 0x08FC, 0x0004, + 0x06CC, 0x092C, 0x0008, + 0x0698, 0x095C, 0x000C, + 0x0660, 0x098C, 0x0014, + 0x062C, 0x09B8, 0x001C, + 0x05FC, 0x09E4, 0x0020, + 0x05C4, 0x0A10, 0x002C, + 0x0590, 0x0A3C, 0x0034, + 0x055C, 0x0A64, 0x0040, + 0x0528, 0x0A8C, 0x004C, + 0x04F8, 0x0AB0, 0x0058, + 0x04C4, 0x0AD4, 0x0068, + 0x0490, 0x0AF8, 0x0078, + 0x0460, 0x0B18, 0x0088, + 0x0430, 0x0B38, 0x0098, + 0x0400, 0x0B54, 0x00AC, + 0x03D0, 0x0B6C, 0x00C4, + 0x03A0, 0x0B88, 0x00D8, + 0x0374, 0x0B9C, 0x00F0, + 0x0348, 0x0BB0, 0x0108, + 0x0318, 0x0BC4, 0x0124, + 0x02EC, 0x0BD4, 0x0140, + 0x02C4, 0x0BE0, 0x015C, + 0x029C, 0x0BEC, 0x0178, + 0x0274, 0x0BF4, 0x0198, + 0x024C, 0x0BFC, 0x01B8, + 0x0228, 0x0BFC, 0x01DC, + 0x0200, 0x0C00, 0x0200 +}; + +//========================================= +// <num_taps> = 3 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_3tap_64p_183[99] = { + 0x0804, 0x07FC, 0x0000, + 0x07D4, 0x0824, 0x0008, + 0x07AC, 0x0840, 0x0014, + 0x0780, 0x0860, 0x0020, + 0x0754, 0x0880, 0x002C, + 0x0728, 0x089C, 0x003C, + 0x0700, 0x08B8, 0x0048, + 0x06D4, 0x08D4, 0x0058, + 0x06A8, 0x08F0, 0x0068, + 0x067C, 0x090C, 0x0078, + 0x0650, 0x0924, 0x008C, + 0x0628, 0x093C, 0x009C, + 0x05FC, 0x0954, 0x00B0, + 0x05D0, 0x096C, 0x00C4, + 0x05A8, 0x0980, 0x00D8, + 0x0578, 0x0998, 0x00F0, + 0x0550, 0x09AC, 0x0104, + 0x0528, 0x09BC, 0x011C, + 0x04FC, 0x09D0, 0x0134, + 0x04D4, 0x09E0, 0x014C, + 0x04A8, 0x09F0, 0x0168, + 0x0480, 0x09FC, 0x0184, + 0x045C, 0x0A08, 0x019C, + 0x0434, 0x0A14, 0x01B8, + 0x0408, 0x0A20, 0x01D8, + 0x03E0, 0x0A2C, 0x01F4, + 0x03B8, 0x0A34, 0x0214, + 0x0394, 0x0A38, 0x0234, + 0x036C, 0x0A40, 0x0254, + 0x0348, 0x0A44, 0x0274, + 0x0324, 0x0A48, 0x0294, + 0x0300, 0x0A48, 0x02B8, + 0x02DC, 0x0A48, 0x02DC +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_64p_upscale[132] = { + 0x0000, 0x1000, 0x0000, 0x0000, + 0x3FDC, 0x0FFC, 0x0028, 0x0000, + 0x3FB4, 0x0FF8, 0x0054, 0x0000, + 0x3F94, 0x0FE8, 0x0084, 0x0000, + 0x3F74, 0x0FDC, 0x00B4, 0x3FFC, + 0x3F58, 0x0FC4, 0x00E8, 0x3FFC, + 0x3F3C, 0x0FAC, 0x0120, 0x3FF8, + 0x3F24, 0x0F90, 0x0158, 0x3FF4, + 0x3F0C, 0x0F70, 0x0194, 0x3FF0, + 0x3EF8, 0x0F4C, 0x01D0, 0x3FEC, + 0x3EE8, 0x0F20, 0x0210, 0x3FE8, + 0x3ED8, 0x0EF4, 0x0254, 0x3FE0, + 0x3ECC, 0x0EC4, 0x0298, 0x3FD8, + 0x3EC0, 0x0E90, 0x02DC, 0x3FD4, + 0x3EB8, 0x0E58, 0x0324, 0x3FCC, + 0x3EB0, 0x0E20, 0x036C, 0x3FC4, + 0x3EAC, 0x0DE4, 0x03B8, 0x3FB8, + 0x3EA8, 0x0DA4, 0x0404, 0x3FB0, + 0x3EA4, 0x0D60, 0x0454, 0x3FA8, + 0x3EA4, 0x0D1C, 0x04A4, 0x3F9C, + 0x3EA4, 0x0CD8, 0x04F4, 0x3F90, + 0x3EA8, 0x0C88, 0x0548, 0x3F88, + 0x3EAC, 0x0C3C, 0x059C, 0x3F7C, + 0x3EB0, 0x0BF0, 0x05F0, 0x3F70, + 0x3EB8, 0x0BA0, 0x0644, 0x3F64, + 0x3EBC, 0x0B54, 0x0698, 0x3F58, + 0x3EC4, 0x0B00, 0x06F0, 0x3F4C, + 0x3ECC, 0x0AAC, 0x0748, 0x3F40, + 0x3ED8, 0x0A54, 0x07A0, 0x3F34, + 0x3EE0, 0x0A04, 0x07F8, 0x3F24, + 0x3EEC, 0x09AC, 0x0850, 0x3F18, + 0x3EF8, 0x0954, 0x08A8, 0x3F0C, + 0x3F00, 0x08FC, 0x0900, 0x3F04 +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_64p_116[132] = { + 0x01A8, 0x0CB4, 0x01A4, 0x0000, + 0x017C, 0x0CB8, 0x01D0, 0x3FFC, + 0x0158, 0x0CB8, 0x01F8, 0x3FF8, + 0x0130, 0x0CB4, 0x0228, 0x3FF4, + 0x0110, 0x0CB0, 0x0254, 0x3FEC, + 0x00EC, 0x0CA8, 0x0284, 0x3FE8, + 0x00CC, 0x0C9C, 0x02B4, 0x3FE4, + 0x00AC, 0x0C90, 0x02E8, 0x3FDC, + 0x0090, 0x0C80, 0x031C, 0x3FD4, + 0x0070, 0x0C70, 0x0350, 0x3FD0, + 0x0058, 0x0C5C, 0x0384, 0x3FC8, + 0x003C, 0x0C48, 0x03BC, 0x3FC0, + 0x0024, 0x0C2C, 0x03F4, 0x3FBC, + 0x0010, 0x0C10, 0x042C, 0x3FB4, + 0x3FFC, 0x0BF4, 0x0464, 0x3FAC, + 0x3FE8, 0x0BD4, 0x04A0, 0x3FA4, + 0x3FD8, 0x0BAC, 0x04DC, 0x3FA0, + 0x3FC4, 0x0B8C, 0x0518, 0x3F98, + 0x3FB4, 0x0B68, 0x0554, 0x3F90, + 0x3FA8, 0x0B40, 0x0590, 0x3F88, + 0x3F9C, 0x0B14, 0x05CC, 0x3F84, + 0x3F90, 0x0AEC, 0x0608, 0x3F7C, + 0x3F84, 0x0ABC, 0x0648, 0x3F78, + 0x3F7C, 0x0A90, 0x0684, 0x3F70, + 0x3F70, 0x0A60, 0x06C4, 0x3F6C, + 0x3F6C, 0x0A2C, 0x0700, 0x3F68, + 0x3F64, 0x09F8, 0x0740, 0x3F64, + 0x3F60, 0x09C4, 0x077C, 0x3F60, + 0x3F5C, 0x098C, 0x07BC, 0x3F5C, + 0x3F58, 0x0958, 0x07F8, 0x3F58, + 0x3F58, 0x091C, 0x0834, 0x3F58, + 0x3F54, 0x08E4, 0x0870, 0x3F58, + 0x3F54, 0x08AC, 0x08AC, 0x3F54 +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_64p_149[132] = { + 0x02B8, 0x0A90, 0x02B8, 0x0000, + 0x0294, 0x0A94, 0x02DC, 0x3FFC, + 0x0274, 0x0A94, 0x0300, 0x3FF8, + 0x0250, 0x0A94, 0x0328, 0x3FF4, + 0x0230, 0x0A90, 0x0350, 0x3FF0, + 0x0214, 0x0A8C, 0x0374, 0x3FEC, + 0x01F0, 0x0A88, 0x03A0, 0x3FE8, + 0x01D4, 0x0A80, 0x03C8, 0x3FE4, + 0x01B8, 0x0A78, 0x03F0, 0x3FE0, + 0x0198, 0x0A70, 0x041C, 0x3FDC, + 0x0180, 0x0A64, 0x0444, 0x3FD8, + 0x0164, 0x0A54, 0x0470, 0x3FD8, + 0x0148, 0x0A48, 0x049C, 0x3FD4, + 0x0130, 0x0A38, 0x04C8, 0x3FD0, + 0x0118, 0x0A24, 0x04F4, 0x3FD0, + 0x0100, 0x0A14, 0x0520, 0x3FCC, + 0x00E8, 0x0A00, 0x054C, 0x3FCC, + 0x00D4, 0x09E8, 0x057C, 0x3FC8, + 0x00C0, 0x09D0, 0x05A8, 0x3FC8, + 0x00AC, 0x09B8, 0x05D4, 0x3FC8, + 0x0098, 0x09A0, 0x0600, 0x3FC8, + 0x0084, 0x0984, 0x0630, 0x3FC8, + 0x0074, 0x0964, 0x065C, 0x3FCC, + 0x0064, 0x0948, 0x0688, 0x3FCC, + 0x0054, 0x0928, 0x06B4, 0x3FD0, + 0x0044, 0x0908, 0x06E0, 0x3FD4, + 0x0038, 0x08E8, 0x070C, 0x3FD4, + 0x002C, 0x08C4, 0x0738, 0x3FD8, + 0x001C, 0x08A4, 0x0760, 0x3FE0, + 0x0014, 0x087C, 0x078C, 0x3FE4, + 0x0008, 0x0858, 0x07B4, 0x3FEC, + 0x0000, 0x0830, 0x07DC, 0x3FF4, + 0x3FFC, 0x0804, 0x0804, 0x3FFC +}; + +//========================================= +// <num_taps> = 4 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_4tap_64p_183[132] = { + 0x03B0, 0x08A0, 0x03B0, 0x0000, + 0x0394, 0x08A0, 0x03CC, 0x0000, + 0x037C, 0x089C, 0x03E8, 0x0000, + 0x0360, 0x089C, 0x0400, 0x0004, + 0x0348, 0x0898, 0x041C, 0x0004, + 0x032C, 0x0894, 0x0438, 0x0008, + 0x0310, 0x0890, 0x0454, 0x000C, + 0x02F8, 0x0888, 0x0474, 0x000C, + 0x02DC, 0x0884, 0x0490, 0x0010, + 0x02C4, 0x087C, 0x04AC, 0x0014, + 0x02AC, 0x0874, 0x04C8, 0x0018, + 0x0290, 0x086C, 0x04E4, 0x0020, + 0x0278, 0x0864, 0x0500, 0x0024, + 0x0264, 0x0858, 0x051C, 0x0028, + 0x024C, 0x084C, 0x0538, 0x0030, + 0x0234, 0x0844, 0x0554, 0x0034, + 0x021C, 0x0838, 0x0570, 0x003C, + 0x0208, 0x0828, 0x058C, 0x0044, + 0x01F0, 0x081C, 0x05A8, 0x004C, + 0x01DC, 0x080C, 0x05C4, 0x0054, + 0x01C8, 0x07FC, 0x05E0, 0x005C, + 0x01B4, 0x07EC, 0x05FC, 0x0064, + 0x019C, 0x07DC, 0x0618, 0x0070, + 0x018C, 0x07CC, 0x0630, 0x0078, + 0x0178, 0x07B8, 0x064C, 0x0084, + 0x0164, 0x07A8, 0x0664, 0x0090, + 0x0150, 0x0794, 0x0680, 0x009C, + 0x0140, 0x0780, 0x0698, 0x00A8, + 0x0130, 0x076C, 0x06B0, 0x00B4, + 0x0120, 0x0758, 0x06C8, 0x00C0, + 0x0110, 0x0740, 0x06E0, 0x00D0, + 0x0100, 0x072C, 0x06F8, 0x00DC, + 0x00F0, 0x0714, 0x0710, 0x00EC +}; + +//========================================= +// <num_taps> = 5 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_5tap_64p_upscale[165] = { + 0x3E40, 0x09C0, 0x09C0, 0x3E40, 0x0000, + 0x3E50, 0x0964, 0x0A18, 0x3E34, 0x0000, + 0x3E5C, 0x0908, 0x0A6C, 0x3E2C, 0x0004, + 0x3E6C, 0x08AC, 0x0AC0, 0x3E20, 0x0008, + 0x3E78, 0x0850, 0x0B14, 0x3E18, 0x000C, + 0x3E88, 0x07F4, 0x0B60, 0x3E14, 0x0010, + 0x3E98, 0x0798, 0x0BB0, 0x3E0C, 0x0014, + 0x3EA8, 0x073C, 0x0C00, 0x3E08, 0x0014, + 0x3EB8, 0x06E4, 0x0C48, 0x3E04, 0x0018, + 0x3ECC, 0x0684, 0x0C90, 0x3E04, 0x001C, + 0x3EDC, 0x062C, 0x0CD4, 0x3E04, 0x0020, + 0x3EEC, 0x05D4, 0x0D1C, 0x3E04, 0x0020, + 0x3EFC, 0x057C, 0x0D5C, 0x3E08, 0x0024, + 0x3F0C, 0x0524, 0x0D98, 0x3E10, 0x0028, + 0x3F20, 0x04CC, 0x0DD8, 0x3E14, 0x0028, + 0x3F30, 0x0478, 0x0E14, 0x3E1C, 0x0028, + 0x3F40, 0x0424, 0x0E48, 0x3E28, 0x002C, + 0x3F50, 0x03D4, 0x0E7C, 0x3E34, 0x002C, + 0x3F60, 0x0384, 0x0EAC, 0x3E44, 0x002C, + 0x3F6C, 0x0338, 0x0EDC, 0x3E54, 0x002C, + 0x3F7C, 0x02E8, 0x0F08, 0x3E68, 0x002C, + 0x3F8C, 0x02A0, 0x0F2C, 0x3E7C, 0x002C, + 0x3F98, 0x0258, 0x0F50, 0x3E94, 0x002C, + 0x3FA4, 0x0210, 0x0F74, 0x3EB0, 0x0028, + 0x3FB0, 0x01CC, 0x0F90, 0x3ECC, 0x0028, + 0x3FC0, 0x018C, 0x0FA8, 0x3EE8, 0x0024, + 0x3FC8, 0x014C, 0x0FC0, 0x3F0C, 0x0020, + 0x3FD4, 0x0110, 0x0FD4, 0x3F2C, 0x001C, + 0x3FE0, 0x00D4, 0x0FE0, 0x3F54, 0x0018, + 0x3FE8, 0x009C, 0x0FF0, 0x3F7C, 0x0010, + 0x3FF0, 0x0064, 0x0FFC, 0x3FA4, 0x000C, + 0x3FFC, 0x0030, 0x0FFC, 0x3FD4, 0x0004, + 0x0000, 0x0000, 0x1000, 0x0000, 0x0000 +}; + +//========================================= +// <num_taps> = 5 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_5tap_64p_116[165] = { + 0x3EDC, 0x0924, 0x0924, 0x3EDC, 0x0000, + 0x3ED8, 0x08EC, 0x095C, 0x3EE0, 0x0000, + 0x3ED4, 0x08B0, 0x0994, 0x3EE8, 0x0000, + 0x3ED0, 0x0878, 0x09C8, 0x3EF0, 0x0000, + 0x3ED0, 0x083C, 0x09FC, 0x3EF8, 0x0000, + 0x3ED0, 0x0800, 0x0A2C, 0x3F04, 0x0000, + 0x3ED0, 0x07C4, 0x0A5C, 0x3F10, 0x0000, + 0x3ED0, 0x0788, 0x0A8C, 0x3F1C, 0x0000, + 0x3ED0, 0x074C, 0x0AC0, 0x3F28, 0x3FFC, + 0x3ED4, 0x0710, 0x0AE8, 0x3F38, 0x3FFC, + 0x3ED8, 0x06D0, 0x0B18, 0x3F48, 0x3FF8, + 0x3EDC, 0x0694, 0x0B3C, 0x3F5C, 0x3FF8, + 0x3EE0, 0x0658, 0x0B68, 0x3F6C, 0x3FF4, + 0x3EE4, 0x061C, 0x0B90, 0x3F80, 0x3FF0, + 0x3EEC, 0x05DC, 0x0BB4, 0x3F98, 0x3FEC, + 0x3EF0, 0x05A0, 0x0BD8, 0x3FB0, 0x3FE8, + 0x3EF8, 0x0564, 0x0BF8, 0x3FC8, 0x3FE4, + 0x3EFC, 0x0528, 0x0C1C, 0x3FE0, 0x3FE0, + 0x3F04, 0x04EC, 0x0C38, 0x3FFC, 0x3FDC, + 0x3F0C, 0x04B4, 0x0C54, 0x0014, 0x3FD8, + 0x3F14, 0x047C, 0x0C70, 0x0030, 0x3FD0, + 0x3F1C, 0x0440, 0x0C88, 0x0050, 0x3FCC, + 0x3F24, 0x0408, 0x0CA0, 0x0070, 0x3FC4, + 0x3F2C, 0x03D0, 0x0CB0, 0x0094, 0x3FC0, + 0x3F34, 0x0398, 0x0CC4, 0x00B8, 0x3FB8, + 0x3F3C, 0x0364, 0x0CD4, 0x00DC, 0x3FB0, + 0x3F48, 0x032C, 0x0CE0, 0x0100, 0x3FAC, + 0x3F50, 0x02F8, 0x0CEC, 0x0128, 0x3FA4, + 0x3F58, 0x02C4, 0x0CF8, 0x0150, 0x3F9C, + 0x3F60, 0x0290, 0x0D00, 0x017C, 0x3F94, + 0x3F68, 0x0260, 0x0D04, 0x01A8, 0x3F8C, + 0x3F74, 0x0230, 0x0D04, 0x01D4, 0x3F84, + 0x3F7C, 0x0200, 0x0D08, 0x0200, 0x3F7C +}; + +//========================================= +// <num_taps> = 5 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_5tap_64p_149[165] = { + 0x3FF4, 0x080C, 0x080C, 0x3FF4, 0x0000, + 0x3FE8, 0x07E8, 0x0830, 0x0000, 0x0000, + 0x3FDC, 0x07C8, 0x0850, 0x0010, 0x3FFC, + 0x3FD0, 0x07A4, 0x0878, 0x001C, 0x3FF8, + 0x3FC4, 0x0780, 0x0898, 0x0030, 0x3FF4, + 0x3FB8, 0x075C, 0x08B8, 0x0040, 0x3FF4, + 0x3FB0, 0x0738, 0x08D8, 0x0050, 0x3FF0, + 0x3FA8, 0x0710, 0x08F8, 0x0064, 0x3FEC, + 0x3FA0, 0x06EC, 0x0914, 0x0078, 0x3FE8, + 0x3F98, 0x06C4, 0x0934, 0x008C, 0x3FE4, + 0x3F90, 0x06A0, 0x094C, 0x00A4, 0x3FE0, + 0x3F8C, 0x0678, 0x0968, 0x00B8, 0x3FDC, + 0x3F84, 0x0650, 0x0984, 0x00D0, 0x3FD8, + 0x3F80, 0x0628, 0x099C, 0x00E8, 0x3FD4, + 0x3F7C, 0x0600, 0x09B8, 0x0100, 0x3FCC, + 0x3F78, 0x05D8, 0x09D0, 0x0118, 0x3FC8, + 0x3F74, 0x05B0, 0x09E4, 0x0134, 0x3FC4, + 0x3F70, 0x0588, 0x09F8, 0x0150, 0x3FC0, + 0x3F70, 0x0560, 0x0A08, 0x016C, 0x3FBC, + 0x3F6C, 0x0538, 0x0A20, 0x0188, 0x3FB4, + 0x3F6C, 0x0510, 0x0A30, 0x01A4, 0x3FB0, + 0x3F6C, 0x04E8, 0x0A3C, 0x01C4, 0x3FAC, + 0x3F6C, 0x04C0, 0x0A48, 0x01E4, 0x3FA8, + 0x3F6C, 0x0498, 0x0A58, 0x0200, 0x3FA4, + 0x3F6C, 0x0470, 0x0A60, 0x0224, 0x3FA0, + 0x3F6C, 0x0448, 0x0A70, 0x0244, 0x3F98, + 0x3F70, 0x0420, 0x0A78, 0x0264, 0x3F94, + 0x3F70, 0x03F8, 0x0A80, 0x0288, 0x3F90, + 0x3F74, 0x03D4, 0x0A84, 0x02A8, 0x3F8C, + 0x3F74, 0x03AC, 0x0A8C, 0x02CC, 0x3F88, + 0x3F78, 0x0384, 0x0A90, 0x02F0, 0x3F84, + 0x3F7C, 0x0360, 0x0A90, 0x0314, 0x3F80, + 0x3F7C, 0x033C, 0x0A90, 0x033C, 0x3F7C +}; + +//========================================= +// <num_taps> = 5 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_5tap_64p_183[165] = { + 0x0168, 0x069C, 0x0698, 0x0164, 0x0000, + 0x0154, 0x068C, 0x06AC, 0x0174, 0x0000, + 0x0144, 0x0674, 0x06C0, 0x0188, 0x0000, + 0x0138, 0x0664, 0x06D0, 0x0198, 0x3FFC, + 0x0128, 0x0654, 0x06E0, 0x01A8, 0x3FFC, + 0x0118, 0x0640, 0x06F0, 0x01BC, 0x3FFC, + 0x010C, 0x0630, 0x0700, 0x01CC, 0x3FF8, + 0x00FC, 0x061C, 0x0710, 0x01E0, 0x3FF8, + 0x00F0, 0x060C, 0x071C, 0x01F0, 0x3FF8, + 0x00E4, 0x05F4, 0x072C, 0x0204, 0x3FF8, + 0x00D8, 0x05E4, 0x0738, 0x0218, 0x3FF4, + 0x00CC, 0x05D0, 0x0744, 0x022C, 0x3FF4, + 0x00C0, 0x05B8, 0x0754, 0x0240, 0x3FF4, + 0x00B4, 0x05A4, 0x0760, 0x0254, 0x3FF4, + 0x00A8, 0x0590, 0x076C, 0x0268, 0x3FF4, + 0x009C, 0x057C, 0x0778, 0x027C, 0x3FF4, + 0x0094, 0x0564, 0x0780, 0x0294, 0x3FF4, + 0x0088, 0x0550, 0x0788, 0x02A8, 0x3FF8, + 0x0080, 0x0538, 0x0794, 0x02BC, 0x3FF8, + 0x0074, 0x0524, 0x079C, 0x02D4, 0x3FF8, + 0x006C, 0x0510, 0x07A4, 0x02E8, 0x3FF8, + 0x0064, 0x04F4, 0x07AC, 0x0300, 0x3FFC, + 0x005C, 0x04E4, 0x07B0, 0x0314, 0x3FFC, + 0x0054, 0x04C8, 0x07B8, 0x032C, 0x0000, + 0x004C, 0x04B4, 0x07C0, 0x0340, 0x0000, + 0x0044, 0x04A0, 0x07C4, 0x0358, 0x0000, + 0x003C, 0x0488, 0x07C8, 0x0370, 0x0004, + 0x0038, 0x0470, 0x07CC, 0x0384, 0x0008, + 0x0030, 0x045C, 0x07D0, 0x039C, 0x0008, + 0x002C, 0x0444, 0x07D0, 0x03B4, 0x000C, + 0x0024, 0x042C, 0x07D4, 0x03CC, 0x0010, + 0x0020, 0x0414, 0x07D4, 0x03E0, 0x0018, + 0x001C, 0x03FC, 0x07D4, 0x03F8, 0x001C +}; + +//========================================= +// <num_taps> = 6 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_6tap_64p_upscale[198] = { + 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000, + 0x000C, 0x3FD0, 0x0FFC, 0x0034, 0x3FF4, 0x0000, + 0x0018, 0x3F9C, 0x0FF8, 0x006C, 0x3FE8, 0x0000, + 0x0024, 0x3F6C, 0x0FF0, 0x00A8, 0x3FD8, 0x0000, + 0x002C, 0x3F44, 0x0FE4, 0x00E4, 0x3FC8, 0x0000, + 0x0038, 0x3F18, 0x0FD4, 0x0124, 0x3FB8, 0x0000, + 0x0040, 0x3EF0, 0x0FC0, 0x0164, 0x3FA8, 0x0004, + 0x0048, 0x3EC8, 0x0FAC, 0x01A8, 0x3F98, 0x0004, + 0x0050, 0x3EA8, 0x0F94, 0x01EC, 0x3F84, 0x0004, + 0x0058, 0x3E84, 0x0F74, 0x0234, 0x3F74, 0x0008, + 0x0060, 0x3E68, 0x0F54, 0x027C, 0x3F60, 0x0008, + 0x0064, 0x3E4C, 0x0F30, 0x02C8, 0x3F4C, 0x000C, + 0x006C, 0x3E30, 0x0F04, 0x0314, 0x3F3C, 0x0010, + 0x0070, 0x3E18, 0x0EDC, 0x0360, 0x3F28, 0x0014, + 0x0074, 0x3E04, 0x0EB0, 0x03B0, 0x3F14, 0x0014, + 0x0078, 0x3DF0, 0x0E80, 0x0400, 0x3F00, 0x0018, + 0x0078, 0x3DE0, 0x0E4C, 0x0454, 0x3EEC, 0x001C, + 0x007C, 0x3DD0, 0x0E14, 0x04A8, 0x3ED8, 0x0020, + 0x007C, 0x3DC4, 0x0DDC, 0x04FC, 0x3EC4, 0x0024, + 0x007C, 0x3DBC, 0x0DA0, 0x0550, 0x3EB0, 0x0028, + 0x0080, 0x3DB4, 0x0D5C, 0x05A8, 0x3E9C, 0x002C, + 0x0080, 0x3DAC, 0x0D1C, 0x0600, 0x3E88, 0x0030, + 0x007C, 0x3DA8, 0x0CDC, 0x0658, 0x3E74, 0x0034, + 0x007C, 0x3DA4, 0x0C94, 0x06B0, 0x3E64, 0x0038, + 0x007C, 0x3DA4, 0x0C48, 0x0708, 0x3E50, 0x0040, + 0x0078, 0x3DA4, 0x0C00, 0x0760, 0x3E40, 0x0044, + 0x0078, 0x3DA8, 0x0BB4, 0x07B8, 0x3E2C, 0x0048, + 0x0074, 0x3DAC, 0x0B68, 0x0810, 0x3E1C, 0x004C, + 0x0070, 0x3DB4, 0x0B18, 0x0868, 0x3E0C, 0x0050, + 0x006C, 0x3DBC, 0x0AC4, 0x08C4, 0x3DFC, 0x0054, + 0x0068, 0x3DC4, 0x0A74, 0x0918, 0x3DF0, 0x0058, + 0x0068, 0x3DCC, 0x0A20, 0x0970, 0x3DE0, 0x005C, + 0x0064, 0x3DD4, 0x09C8, 0x09C8, 0x3DD4, 0x0064 +}; + +//========================================= +// <num_taps> = 6 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_6tap_64p_116[198] = { + 0x3F0C, 0x0240, 0x0D68, 0x0240, 0x3F0C, 0x0000, + 0x3F18, 0x0210, 0x0D64, 0x0274, 0x3F00, 0x0000, + 0x3F24, 0x01E0, 0x0D58, 0x02A8, 0x3EF8, 0x0004, + 0x3F2C, 0x01B0, 0x0D58, 0x02DC, 0x3EEC, 0x0004, + 0x3F38, 0x0180, 0x0D50, 0x0310, 0x3EE0, 0x0008, + 0x3F44, 0x0154, 0x0D40, 0x0348, 0x3ED8, 0x0008, + 0x3F50, 0x0128, 0x0D34, 0x037C, 0x3ECC, 0x000C, + 0x3F5C, 0x00FC, 0x0D20, 0x03B4, 0x3EC4, 0x0010, + 0x3F64, 0x00D4, 0x0D14, 0x03EC, 0x3EB8, 0x0010, + 0x3F70, 0x00AC, 0x0CFC, 0x0424, 0x3EB0, 0x0014, + 0x3F78, 0x0084, 0x0CE8, 0x0460, 0x3EA8, 0x0014, + 0x3F84, 0x0060, 0x0CCC, 0x0498, 0x3EA0, 0x0018, + 0x3F90, 0x003C, 0x0CB4, 0x04D0, 0x3E98, 0x0018, + 0x3F98, 0x0018, 0x0C9C, 0x050C, 0x3E90, 0x0018, + 0x3FA0, 0x3FFC, 0x0C78, 0x0548, 0x3E88, 0x001C, + 0x3FAC, 0x3FDC, 0x0C54, 0x0584, 0x3E84, 0x001C, + 0x3FB4, 0x3FBC, 0x0C3C, 0x05BC, 0x3E7C, 0x001C, + 0x3FBC, 0x3FA0, 0x0C14, 0x05F8, 0x3E78, 0x0020, + 0x3FC4, 0x3F84, 0x0BF0, 0x0634, 0x3E74, 0x0020, + 0x3FCC, 0x3F68, 0x0BCC, 0x0670, 0x3E70, 0x0020, + 0x3FD4, 0x3F50, 0x0BA4, 0x06AC, 0x3E6C, 0x0020, + 0x3FDC, 0x3F38, 0x0B78, 0x06E8, 0x3E6C, 0x0020, + 0x3FE0, 0x3F24, 0x0B50, 0x0724, 0x3E68, 0x0020, + 0x3FE8, 0x3F0C, 0x0B24, 0x0760, 0x3E68, 0x0020, + 0x3FF0, 0x3EFC, 0x0AF4, 0x0798, 0x3E68, 0x0020, + 0x3FF4, 0x3EE8, 0x0AC8, 0x07D4, 0x3E68, 0x0020, + 0x3FFC, 0x3ED8, 0x0A94, 0x0810, 0x3E6C, 0x001C, + 0x0000, 0x3EC8, 0x0A64, 0x0848, 0x3E70, 0x001C, + 0x0000, 0x3EB8, 0x0A38, 0x0880, 0x3E74, 0x001C, + 0x0004, 0x3EAC, 0x0A04, 0x08BC, 0x3E78, 0x0018, + 0x0008, 0x3EA4, 0x09D0, 0x08F4, 0x3E7C, 0x0014, + 0x000C, 0x3E98, 0x0998, 0x092C, 0x3E84, 0x0014, + 0x0010, 0x3E90, 0x0964, 0x0960, 0x3E8C, 0x0010 +}; + +//========================================= +// <num_taps> = 6 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_6tap_64p_149[198] = { + 0x3F14, 0x0394, 0x0AB0, 0x0394, 0x3F14, 0x0000, + 0x3F18, 0x036C, 0x0AB0, 0x03B8, 0x3F14, 0x0000, + 0x3F18, 0x0348, 0x0AAC, 0x03E0, 0x3F14, 0x0000, + 0x3F1C, 0x0320, 0x0AAC, 0x0408, 0x3F10, 0x0000, + 0x3F20, 0x02FC, 0x0AA8, 0x042C, 0x3F10, 0x0000, + 0x3F24, 0x02D8, 0x0AA0, 0x0454, 0x3F10, 0x0000, + 0x3F28, 0x02B4, 0x0A98, 0x047C, 0x3F10, 0x0000, + 0x3F28, 0x0290, 0x0A90, 0x04A4, 0x3F14, 0x0000, + 0x3F30, 0x026C, 0x0A84, 0x04CC, 0x3F14, 0x0000, + 0x3F34, 0x024C, 0x0A7C, 0x04F4, 0x3F14, 0x3FFC, + 0x3F38, 0x0228, 0x0A70, 0x051C, 0x3F18, 0x3FFC, + 0x3F3C, 0x0208, 0x0A64, 0x0544, 0x3F1C, 0x3FF8, + 0x3F40, 0x01E8, 0x0A54, 0x056C, 0x3F20, 0x3FF8, + 0x3F44, 0x01C8, 0x0A48, 0x0594, 0x3F24, 0x3FF4, + 0x3F4C, 0x01A8, 0x0A34, 0x05BC, 0x3F28, 0x3FF4, + 0x3F50, 0x0188, 0x0A28, 0x05E4, 0x3F2C, 0x3FF0, + 0x3F54, 0x016C, 0x0A10, 0x060C, 0x3F34, 0x3FF0, + 0x3F5C, 0x014C, 0x09FC, 0x0634, 0x3F3C, 0x3FEC, + 0x3F60, 0x0130, 0x09EC, 0x065C, 0x3F40, 0x3FE8, + 0x3F68, 0x0114, 0x09D0, 0x0684, 0x3F48, 0x3FE8, + 0x3F6C, 0x00F8, 0x09B8, 0x06AC, 0x3F54, 0x3FE4, + 0x3F74, 0x00E0, 0x09A0, 0x06D0, 0x3F5C, 0x3FE0, + 0x3F78, 0x00C4, 0x098C, 0x06F8, 0x3F64, 0x3FDC, + 0x3F7C, 0x00AC, 0x0970, 0x0720, 0x3F70, 0x3FD8, + 0x3F84, 0x0094, 0x0954, 0x0744, 0x3F7C, 0x3FD4, + 0x3F88, 0x007C, 0x093C, 0x0768, 0x3F88, 0x3FD0, + 0x3F90, 0x0064, 0x091C, 0x0790, 0x3F94, 0x3FCC, + 0x3F94, 0x0050, 0x08FC, 0x07B4, 0x3FA4, 0x3FC8, + 0x3F98, 0x003C, 0x08E0, 0x07D8, 0x3FB0, 0x3FC4, + 0x3FA0, 0x0024, 0x08C0, 0x07FC, 0x3FC0, 0x3FC0, + 0x3FA4, 0x0014, 0x08A4, 0x081C, 0x3FD0, 0x3FB8, + 0x3FAC, 0x0000, 0x0880, 0x0840, 0x3FE0, 0x3FB4, + 0x3FB0, 0x3FF0, 0x0860, 0x0860, 0x3FF0, 0x3FB0 +}; + +//========================================= +// <num_taps> = 6 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_6tap_64p_183[198] = { + 0x002C, 0x0420, 0x076C, 0x041C, 0x002C, 0x0000, + 0x0028, 0x040C, 0x0768, 0x0430, 0x0034, 0x0000, + 0x0020, 0x03F8, 0x0768, 0x0448, 0x003C, 0x3FFC, + 0x0018, 0x03E4, 0x0768, 0x045C, 0x0044, 0x3FFC, + 0x0014, 0x03D0, 0x0768, 0x0470, 0x004C, 0x3FF8, + 0x000C, 0x03BC, 0x0764, 0x0484, 0x0058, 0x3FF8, + 0x0008, 0x03A4, 0x0764, 0x049C, 0x0060, 0x3FF4, + 0x0004, 0x0390, 0x0760, 0x04B0, 0x0068, 0x3FF4, + 0x0000, 0x037C, 0x0760, 0x04C4, 0x0070, 0x3FF0, + 0x3FFC, 0x0364, 0x075C, 0x04D8, 0x007C, 0x3FF0, + 0x3FF8, 0x0350, 0x0758, 0x04F0, 0x0084, 0x3FEC, + 0x3FF4, 0x033C, 0x0750, 0x0504, 0x0090, 0x3FEC, + 0x3FF0, 0x0328, 0x074C, 0x0518, 0x009C, 0x3FE8, + 0x3FEC, 0x0314, 0x0744, 0x052C, 0x00A8, 0x3FE8, + 0x3FE8, 0x0304, 0x0740, 0x0540, 0x00B0, 0x3FE4, + 0x3FE4, 0x02EC, 0x073C, 0x0554, 0x00BC, 0x3FE4, + 0x3FE0, 0x02DC, 0x0734, 0x0568, 0x00C8, 0x3FE0, + 0x3FE0, 0x02C4, 0x072C, 0x057C, 0x00D4, 0x3FE0, + 0x3FDC, 0x02B4, 0x0724, 0x058C, 0x00E4, 0x3FDC, + 0x3FDC, 0x02A0, 0x0718, 0x05A0, 0x00F0, 0x3FDC, + 0x3FD8, 0x028C, 0x0714, 0x05B4, 0x00FC, 0x3FD8, + 0x3FD8, 0x0278, 0x0704, 0x05C8, 0x010C, 0x3FD8, + 0x3FD4, 0x0264, 0x0700, 0x05D8, 0x0118, 0x3FD8, + 0x3FD4, 0x0254, 0x06F0, 0x05EC, 0x0128, 0x3FD4, + 0x3FD0, 0x0244, 0x06E8, 0x05FC, 0x0134, 0x3FD4, + 0x3FD0, 0x0230, 0x06DC, 0x060C, 0x0144, 0x3FD4, + 0x3FD0, 0x021C, 0x06D0, 0x0620, 0x0154, 0x3FD0, + 0x3FD0, 0x0208, 0x06C4, 0x0630, 0x0164, 0x3FD0, + 0x3FD0, 0x01F8, 0x06B8, 0x0640, 0x0170, 0x3FD0, + 0x3FCC, 0x01E8, 0x06AC, 0x0650, 0x0180, 0x3FD0, + 0x3FCC, 0x01D8, 0x069C, 0x0660, 0x0190, 0x3FD0, + 0x3FCC, 0x01C4, 0x068C, 0x0670, 0x01A4, 0x3FD0, + 0x3FCC, 0x01B8, 0x0680, 0x067C, 0x01B4, 0x3FCC +}; + +//========================================= +// <num_taps> = 7 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_7tap_64p_upscale[231] = { + 0x00B0, 0x3D98, 0x09BC, 0x09B8, 0x3D94, 0x00B0, 0x0000, + 0x00AC, 0x3DA0, 0x0968, 0x0A10, 0x3D88, 0x00B4, 0x0000, + 0x00A8, 0x3DAC, 0x0914, 0x0A60, 0x3D80, 0x00B8, 0x0000, + 0x00A4, 0x3DB8, 0x08C0, 0x0AB4, 0x3D78, 0x00BC, 0x3FFC, + 0x00A0, 0x3DC8, 0x0868, 0x0B00, 0x3D74, 0x00C0, 0x3FFC, + 0x0098, 0x3DD8, 0x0818, 0x0B54, 0x3D6C, 0x00C0, 0x3FF8, + 0x0094, 0x3DE8, 0x07C0, 0x0B9C, 0x3D6C, 0x00C4, 0x3FF8, + 0x008C, 0x3DFC, 0x0768, 0x0BEC, 0x3D68, 0x00C4, 0x3FF8, + 0x0088, 0x3E0C, 0x0714, 0x0C38, 0x3D68, 0x00C4, 0x3FF4, + 0x0080, 0x3E20, 0x06BC, 0x0C80, 0x3D6C, 0x00C4, 0x3FF4, + 0x0078, 0x3E34, 0x0668, 0x0CC4, 0x3D70, 0x00C4, 0x3FF4, + 0x0074, 0x3E48, 0x0610, 0x0D08, 0x3D78, 0x00C4, 0x3FF0, + 0x006C, 0x3E5C, 0x05BC, 0x0D48, 0x3D80, 0x00C4, 0x3FF0, + 0x0068, 0x3E74, 0x0568, 0x0D84, 0x3D88, 0x00C0, 0x3FF0, + 0x0060, 0x3E88, 0x0514, 0x0DC8, 0x3D94, 0x00BC, 0x3FEC, + 0x0058, 0x3E9C, 0x04C0, 0x0E04, 0x3DA4, 0x00B8, 0x3FEC, + 0x0054, 0x3EB4, 0x046C, 0x0E38, 0x3DB4, 0x00B4, 0x3FEC, + 0x004C, 0x3ECC, 0x0418, 0x0E6C, 0x3DC8, 0x00B0, 0x3FEC, + 0x0044, 0x3EE0, 0x03C8, 0x0EA4, 0x3DDC, 0x00A8, 0x3FEC, + 0x0040, 0x3EF8, 0x0378, 0x0ED0, 0x3DF4, 0x00A0, 0x3FEC, + 0x0038, 0x3F0C, 0x032C, 0x0EFC, 0x3E10, 0x0098, 0x3FEC, + 0x0034, 0x3F24, 0x02DC, 0x0F24, 0x3E2C, 0x0090, 0x3FEC, + 0x002C, 0x3F38, 0x0294, 0x0F4C, 0x3E48, 0x0088, 0x3FEC, + 0x0028, 0x3F50, 0x0248, 0x0F68, 0x3E6C, 0x007C, 0x3FF0, + 0x0020, 0x3F64, 0x0200, 0x0F88, 0x3E90, 0x0074, 0x3FF0, + 0x001C, 0x3F7C, 0x01B8, 0x0FA4, 0x3EB4, 0x0068, 0x3FF0, + 0x0018, 0x3F90, 0x0174, 0x0FBC, 0x3EDC, 0x0058, 0x3FF4, + 0x0014, 0x3FA4, 0x0130, 0x0FD0, 0x3F08, 0x004C, 0x3FF4, + 0x000C, 0x3FB8, 0x00F0, 0x0FE4, 0x3F34, 0x003C, 0x3FF8, + 0x0008, 0x3FCC, 0x00B0, 0x0FF0, 0x3F64, 0x0030, 0x3FF8, + 0x0004, 0x3FDC, 0x0070, 0x0FFC, 0x3F98, 0x0020, 0x3FFC, + 0x0000, 0x3FF0, 0x0038, 0x0FFC, 0x3FCC, 0x0010, 0x0000, + 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000 +}; + +//========================================= +// <num_taps> = 7 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_7tap_64p_116[231] = { + 0x0020, 0x3E58, 0x0988, 0x0988, 0x3E58, 0x0020, 0x0000, + 0x0024, 0x3E4C, 0x0954, 0x09C0, 0x3E64, 0x0018, 0x0000, + 0x002C, 0x3E44, 0x091C, 0x09F4, 0x3E70, 0x0010, 0x0000, + 0x0030, 0x3E3C, 0x08E8, 0x0A24, 0x3E80, 0x0008, 0x0000, + 0x0034, 0x3E34, 0x08AC, 0x0A5C, 0x3E90, 0x0000, 0x0000, + 0x003C, 0x3E30, 0x0870, 0x0A84, 0x3EA0, 0x3FFC, 0x0004, + 0x0040, 0x3E28, 0x0838, 0x0AB4, 0x3EB4, 0x3FF4, 0x0004, + 0x0044, 0x3E24, 0x07FC, 0x0AE4, 0x3EC8, 0x3FEC, 0x0004, + 0x0048, 0x3E24, 0x07C4, 0x0B08, 0x3EDC, 0x3FE4, 0x0008, + 0x0048, 0x3E20, 0x0788, 0x0B3C, 0x3EF4, 0x3FD8, 0x0008, + 0x004C, 0x3E20, 0x074C, 0x0B60, 0x3F0C, 0x3FD0, 0x000C, + 0x0050, 0x3E20, 0x0710, 0x0B8C, 0x3F24, 0x3FC4, 0x000C, + 0x0050, 0x3E20, 0x06D4, 0x0BB0, 0x3F40, 0x3FBC, 0x0010, + 0x0054, 0x3E24, 0x0698, 0x0BD4, 0x3F5C, 0x3FB0, 0x0010, + 0x0054, 0x3E24, 0x065C, 0x0BFC, 0x3F78, 0x3FA4, 0x0014, + 0x0054, 0x3E28, 0x0624, 0x0C1C, 0x3F98, 0x3F98, 0x0014, + 0x0058, 0x3E2C, 0x05E4, 0x0C3C, 0x3FB8, 0x3F8C, 0x0018, + 0x0058, 0x3E34, 0x05A8, 0x0C58, 0x3FD8, 0x3F80, 0x001C, + 0x0058, 0x3E38, 0x0570, 0x0C78, 0x3FF8, 0x3F74, 0x001C, + 0x0058, 0x3E40, 0x0534, 0x0C94, 0x0018, 0x3F68, 0x0020, + 0x0058, 0x3E48, 0x04F4, 0x0CAC, 0x0040, 0x3F5C, 0x0024, + 0x0058, 0x3E50, 0x04BC, 0x0CC4, 0x0064, 0x3F50, 0x0024, + 0x0054, 0x3E58, 0x0484, 0x0CD8, 0x008C, 0x3F44, 0x0028, + 0x0054, 0x3E60, 0x0448, 0x0CEC, 0x00B4, 0x3F38, 0x002C, + 0x0054, 0x3E68, 0x0410, 0x0CFC, 0x00E0, 0x3F28, 0x0030, + 0x0054, 0x3E74, 0x03D4, 0x0D0C, 0x010C, 0x3F1C, 0x0030, + 0x0050, 0x3E7C, 0x03A0, 0x0D18, 0x0138, 0x3F10, 0x0034, + 0x0050, 0x3E88, 0x0364, 0x0D24, 0x0164, 0x3F04, 0x0038, + 0x004C, 0x3E94, 0x0330, 0x0D30, 0x0194, 0x3EF4, 0x0038, + 0x004C, 0x3EA0, 0x02F8, 0x0D34, 0x01C4, 0x3EE8, 0x003C, + 0x0048, 0x3EAC, 0x02C0, 0x0D3C, 0x01F4, 0x3EDC, 0x0040, + 0x0048, 0x3EB8, 0x0290, 0x0D3C, 0x0224, 0x3ED0, 0x0040, + 0x0044, 0x3EC4, 0x0258, 0x0D40, 0x0258, 0x3EC4, 0x0044 +}; + +//========================================= +// <num_taps> = 7 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_7tap_64p_149[231] = { + 0x3F68, 0x3FEC, 0x08A8, 0x08AC, 0x3FF0, 0x3F68, 0x0000, + 0x3F70, 0x3FDC, 0x0888, 0x08CC, 0x0000, 0x3F60, 0x0000, + 0x3F74, 0x3FC8, 0x0868, 0x08F0, 0x0014, 0x3F58, 0x0000, + 0x3F7C, 0x3FB4, 0x0844, 0x0908, 0x002C, 0x3F54, 0x0004, + 0x3F84, 0x3FA4, 0x0820, 0x0924, 0x0044, 0x3F4C, 0x0004, + 0x3F88, 0x3F90, 0x0800, 0x0944, 0x005C, 0x3F44, 0x0004, + 0x3F90, 0x3F80, 0x07D8, 0x095C, 0x0074, 0x3F40, 0x0008, + 0x3F98, 0x3F70, 0x07B0, 0x097C, 0x008C, 0x3F38, 0x0008, + 0x3F9C, 0x3F60, 0x0790, 0x0994, 0x00A8, 0x3F30, 0x0008, + 0x3FA4, 0x3F54, 0x0764, 0x09B0, 0x00C4, 0x3F28, 0x0008, + 0x3FA8, 0x3F48, 0x0740, 0x09C4, 0x00DC, 0x3F24, 0x000C, + 0x3FB0, 0x3F38, 0x0718, 0x09DC, 0x00FC, 0x3F1C, 0x000C, + 0x3FB4, 0x3F2C, 0x06F0, 0x09F4, 0x0118, 0x3F18, 0x000C, + 0x3FBC, 0x3F24, 0x06C8, 0x0A08, 0x0134, 0x3F10, 0x000C, + 0x3FC0, 0x3F18, 0x06A0, 0x0A1C, 0x0154, 0x3F08, 0x0010, + 0x3FC8, 0x3F10, 0x0678, 0x0A2C, 0x0170, 0x3F04, 0x0010, + 0x3FCC, 0x3F04, 0x0650, 0x0A40, 0x0190, 0x3F00, 0x0010, + 0x3FD0, 0x3EFC, 0x0628, 0x0A54, 0x01B0, 0x3EF8, 0x0010, + 0x3FD4, 0x3EF4, 0x0600, 0x0A64, 0x01D0, 0x3EF4, 0x0010, + 0x3FDC, 0x3EEC, 0x05D8, 0x0A6C, 0x01F4, 0x3EF0, 0x0010, + 0x3FE0, 0x3EE8, 0x05B0, 0x0A7C, 0x0214, 0x3EE8, 0x0010, + 0x3FE4, 0x3EE0, 0x0588, 0x0A88, 0x0238, 0x3EE4, 0x0010, + 0x3FE8, 0x3EDC, 0x055C, 0x0A98, 0x0258, 0x3EE0, 0x0010, + 0x3FEC, 0x3ED8, 0x0534, 0x0AA0, 0x027C, 0x3EDC, 0x0010, + 0x3FF0, 0x3ED4, 0x050C, 0x0AAC, 0x02A0, 0x3ED8, 0x000C, + 0x3FF4, 0x3ED0, 0x04E4, 0x0AB4, 0x02C4, 0x3ED4, 0x000C, + 0x3FF4, 0x3ECC, 0x04C0, 0x0ABC, 0x02E8, 0x3ED0, 0x000C, + 0x3FF8, 0x3ECC, 0x0494, 0x0AC0, 0x030C, 0x3ED0, 0x000C, + 0x3FFC, 0x3EC8, 0x046C, 0x0AC8, 0x0334, 0x3ECC, 0x0008, + 0x0000, 0x3EC8, 0x0444, 0x0AC8, 0x0358, 0x3ECC, 0x0008, + 0x0000, 0x3EC8, 0x041C, 0x0ACC, 0x0380, 0x3EC8, 0x0008, + 0x0000, 0x3EC8, 0x03F4, 0x0AD0, 0x03A8, 0x3EC8, 0x0004, + 0x0004, 0x3EC8, 0x03CC, 0x0AD0, 0x03CC, 0x3EC8, 0x0004 +}; + +//========================================= +// <num_taps> = 7 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_7tap_64p_183[231] = { + 0x3FA4, 0x01E8, 0x0674, 0x0674, 0x01E8, 0x3FA4, 0x0000, + 0x3FA4, 0x01D4, 0x0668, 0x0684, 0x01F8, 0x3FA4, 0x0000, + 0x3FA4, 0x01C4, 0x0658, 0x0690, 0x0208, 0x3FA8, 0x0000, + 0x3FA0, 0x01B4, 0x064C, 0x06A0, 0x021C, 0x3FA8, 0x3FFC, + 0x3FA0, 0x01A4, 0x063C, 0x06AC, 0x022C, 0x3FAC, 0x3FFC, + 0x3FA0, 0x0194, 0x0630, 0x06B4, 0x0240, 0x3FAC, 0x3FFC, + 0x3FA0, 0x0184, 0x0620, 0x06C4, 0x0250, 0x3FB0, 0x3FF8, + 0x3FA0, 0x0174, 0x0614, 0x06CC, 0x0264, 0x3FB0, 0x3FF8, + 0x3FA0, 0x0164, 0x0604, 0x06D8, 0x0278, 0x3FB4, 0x3FF4, + 0x3FA0, 0x0154, 0x05F4, 0x06E4, 0x0288, 0x3FB8, 0x3FF4, + 0x3FA0, 0x0148, 0x05E4, 0x06EC, 0x029C, 0x3FBC, 0x3FF0, + 0x3FA0, 0x0138, 0x05D4, 0x06F4, 0x02B0, 0x3FC0, 0x3FF0, + 0x3FA0, 0x0128, 0x05C4, 0x0704, 0x02C4, 0x3FC0, 0x3FEC, + 0x3FA0, 0x011C, 0x05B4, 0x0708, 0x02D8, 0x3FC4, 0x3FEC, + 0x3FA4, 0x010C, 0x05A4, 0x0714, 0x02E8, 0x3FC8, 0x3FE8, + 0x3FA4, 0x0100, 0x0590, 0x0718, 0x02FC, 0x3FD0, 0x3FE8, + 0x3FA4, 0x00F0, 0x0580, 0x0724, 0x0310, 0x3FD4, 0x3FE4, + 0x3FA4, 0x00E4, 0x056C, 0x072C, 0x0324, 0x3FD8, 0x3FE4, + 0x3FA8, 0x00D8, 0x055C, 0x0730, 0x0338, 0x3FDC, 0x3FE0, + 0x3FA8, 0x00CC, 0x0548, 0x0738, 0x034C, 0x3FE4, 0x3FDC, + 0x3FA8, 0x00BC, 0x0538, 0x0740, 0x0360, 0x3FE8, 0x3FDC, + 0x3FAC, 0x00B0, 0x0528, 0x0744, 0x0374, 0x3FEC, 0x3FD8, + 0x3FAC, 0x00A4, 0x0514, 0x0748, 0x0388, 0x3FF4, 0x3FD8, + 0x3FB0, 0x0098, 0x0500, 0x074C, 0x039C, 0x3FFC, 0x3FD4, + 0x3FB0, 0x0090, 0x04EC, 0x0750, 0x03B0, 0x0000, 0x3FD4, + 0x3FB0, 0x0084, 0x04DC, 0x0758, 0x03C4, 0x0004, 0x3FD0, + 0x3FB4, 0x0078, 0x04CC, 0x0758, 0x03D8, 0x000C, 0x3FCC, + 0x3FB4, 0x006C, 0x04B8, 0x075C, 0x03EC, 0x0014, 0x3FCC, + 0x3FB8, 0x0064, 0x04A0, 0x0760, 0x0400, 0x001C, 0x3FC8, + 0x3FB8, 0x0058, 0x0490, 0x0760, 0x0414, 0x0024, 0x3FC8, + 0x3FBC, 0x0050, 0x047C, 0x0760, 0x0428, 0x002C, 0x3FC4, + 0x3FBC, 0x0048, 0x0464, 0x0764, 0x043C, 0x0034, 0x3FC4, + 0x3FC0, 0x003C, 0x0454, 0x0764, 0x0450, 0x003C, 0x3FC0 +}; + +//========================================= +// <num_taps> = 8 +// <num_phases> = 64 +// <scale_ratio> = 0.83333 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_8tap_64p_upscale[264] = { + 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x3FFC, 0x0014, 0x3FC8, 0x1000, 0x0038, 0x3FEC, 0x0004, 0x0000, + 0x3FF4, 0x0024, 0x3F94, 0x0FFC, 0x0074, 0x3FD8, 0x000C, 0x0000, + 0x3FF0, 0x0038, 0x3F60, 0x0FEC, 0x00B4, 0x3FC4, 0x0014, 0x0000, + 0x3FEC, 0x004C, 0x3F2C, 0x0FE4, 0x00F4, 0x3FAC, 0x0018, 0x0000, + 0x3FE4, 0x005C, 0x3F00, 0x0FD4, 0x0138, 0x3F94, 0x0020, 0x0000, + 0x3FE0, 0x006C, 0x3ED0, 0x0FC4, 0x017C, 0x3F7C, 0x0028, 0x0000, + 0x3FDC, 0x007C, 0x3EA8, 0x0FA4, 0x01C4, 0x3F68, 0x0030, 0x0000, + 0x3FD8, 0x0088, 0x3E80, 0x0F90, 0x020C, 0x3F50, 0x0038, 0x3FFC, + 0x3FD4, 0x0098, 0x3E58, 0x0F70, 0x0258, 0x3F38, 0x0040, 0x3FFC, + 0x3FD0, 0x00A4, 0x3E34, 0x0F54, 0x02A0, 0x3F1C, 0x004C, 0x3FFC, + 0x3FD0, 0x00B0, 0x3E14, 0x0F28, 0x02F0, 0x3F04, 0x0054, 0x3FFC, + 0x3FCC, 0x00BC, 0x3DF4, 0x0F08, 0x033C, 0x3EEC, 0x005C, 0x3FF8, + 0x3FC8, 0x00C8, 0x3DD8, 0x0EDC, 0x038C, 0x3ED4, 0x0064, 0x3FF8, + 0x3FC8, 0x00D0, 0x3DC0, 0x0EAC, 0x03E0, 0x3EBC, 0x006C, 0x3FF4, + 0x3FC4, 0x00D8, 0x3DA8, 0x0E7C, 0x0430, 0x3EA4, 0x0078, 0x3FF4, + 0x3FC4, 0x00E0, 0x3D94, 0x0E48, 0x0484, 0x3E8C, 0x0080, 0x3FF0, + 0x3FC4, 0x00E8, 0x3D80, 0x0E10, 0x04D8, 0x3E74, 0x0088, 0x3FF0, + 0x3FC4, 0x00F0, 0x3D70, 0x0DD8, 0x052C, 0x3E5C, 0x0090, 0x3FEC, + 0x3FC0, 0x00F4, 0x3D60, 0x0DA0, 0x0584, 0x3E44, 0x0098, 0x3FEC, + 0x3FC0, 0x00F8, 0x3D54, 0x0D68, 0x05D8, 0x3E2C, 0x00A0, 0x3FE8, + 0x3FC0, 0x00FC, 0x3D48, 0x0D20, 0x0630, 0x3E18, 0x00AC, 0x3FE8, + 0x3FC0, 0x0100, 0x3D40, 0x0CE0, 0x0688, 0x3E00, 0x00B4, 0x3FE4, + 0x3FC4, 0x0100, 0x3D3C, 0x0C98, 0x06DC, 0x3DEC, 0x00BC, 0x3FE4, + 0x3FC4, 0x0100, 0x3D38, 0x0C58, 0x0734, 0x3DD8, 0x00C0, 0x3FE0, + 0x3FC4, 0x0104, 0x3D38, 0x0C0C, 0x078C, 0x3DC4, 0x00C8, 0x3FDC, + 0x3FC4, 0x0100, 0x3D38, 0x0BC4, 0x07E4, 0x3DB0, 0x00D0, 0x3FDC, + 0x3FC4, 0x0100, 0x3D38, 0x0B78, 0x083C, 0x3DA0, 0x00D8, 0x3FD8, + 0x3FC8, 0x0100, 0x3D3C, 0x0B28, 0x0890, 0x3D90, 0x00DC, 0x3FD8, + 0x3FC8, 0x00FC, 0x3D40, 0x0ADC, 0x08E8, 0x3D80, 0x00E4, 0x3FD4, + 0x3FCC, 0x00FC, 0x3D48, 0x0A84, 0x093C, 0x3D74, 0x00E8, 0x3FD4, + 0x3FCC, 0x00F8, 0x3D50, 0x0A38, 0x0990, 0x3D64, 0x00F0, 0x3FD0, + 0x3FD0, 0x00F4, 0x3D58, 0x09E0, 0x09E4, 0x3D5C, 0x00F4, 0x3FD0 +}; + +//========================================= +// <num_taps> = 8 +// <num_phases> = 64 +// <scale_ratio> = 1.16666 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_8tap_64p_116[264] = { + 0x0080, 0x3E90, 0x0268, 0x0D14, 0x0264, 0x3E90, 0x0080, 0x0000, + 0x007C, 0x3E9C, 0x0238, 0x0D14, 0x0298, 0x3E84, 0x0080, 0x0000, + 0x0078, 0x3EAC, 0x0200, 0x0D10, 0x02D0, 0x3E78, 0x0084, 0x0000, + 0x0078, 0x3EB8, 0x01D0, 0x0D0C, 0x0304, 0x3E6C, 0x0084, 0x0000, + 0x0074, 0x3EC8, 0x01A0, 0x0D00, 0x033C, 0x3E60, 0x0088, 0x0000, + 0x0070, 0x3ED4, 0x0170, 0x0D00, 0x0374, 0x3E54, 0x0088, 0x3FFC, + 0x006C, 0x3EE4, 0x0140, 0x0CF8, 0x03AC, 0x3E48, 0x0088, 0x3FFC, + 0x006C, 0x3EF0, 0x0114, 0x0CE8, 0x03E4, 0x3E3C, 0x008C, 0x3FFC, + 0x0068, 0x3F00, 0x00E8, 0x0CD8, 0x041C, 0x3E34, 0x008C, 0x3FFC, + 0x0064, 0x3F10, 0x00BC, 0x0CCC, 0x0454, 0x3E28, 0x008C, 0x3FFC, + 0x0060, 0x3F1C, 0x0090, 0x0CBC, 0x0490, 0x3E20, 0x008C, 0x3FFC, + 0x005C, 0x3F2C, 0x0068, 0x0CA4, 0x04CC, 0x3E18, 0x008C, 0x3FFC, + 0x0058, 0x3F38, 0x0040, 0x0C94, 0x0504, 0x3E10, 0x008C, 0x3FFC, + 0x0054, 0x3F48, 0x001C, 0x0C7C, 0x0540, 0x3E08, 0x0088, 0x3FFC, + 0x0050, 0x3F54, 0x3FF8, 0x0C60, 0x057C, 0x3E04, 0x0088, 0x3FFC, + 0x004C, 0x3F64, 0x3FD4, 0x0C44, 0x05B8, 0x3DFC, 0x0088, 0x3FFC, + 0x0048, 0x3F70, 0x3FB4, 0x0C28, 0x05F4, 0x3DF8, 0x0084, 0x3FFC, + 0x0044, 0x3F80, 0x3F90, 0x0C0C, 0x0630, 0x3DF4, 0x0080, 0x3FFC, + 0x0040, 0x3F8C, 0x3F70, 0x0BE8, 0x066C, 0x3DF4, 0x0080, 0x3FFC, + 0x003C, 0x3F9C, 0x3F50, 0x0BC8, 0x06A8, 0x3DF0, 0x007C, 0x3FFC, + 0x0038, 0x3FA8, 0x3F34, 0x0BA0, 0x06E4, 0x3DF0, 0x0078, 0x0000, + 0x0034, 0x3FB4, 0x3F18, 0x0B80, 0x071C, 0x3DF0, 0x0074, 0x0000, + 0x0030, 0x3FC0, 0x3EFC, 0x0B5C, 0x0758, 0x3DF0, 0x0070, 0x0000, + 0x002C, 0x3FCC, 0x3EE4, 0x0B34, 0x0794, 0x3DF4, 0x0068, 0x0000, + 0x002C, 0x3FDC, 0x3ECC, 0x0B08, 0x07CC, 0x3DF4, 0x0064, 0x0000, + 0x0028, 0x3FE4, 0x3EB4, 0x0AE0, 0x0808, 0x3DF8, 0x0060, 0x0000, + 0x0024, 0x3FF0, 0x3EA0, 0x0AB0, 0x0840, 0x3E00, 0x0058, 0x0004, + 0x0020, 0x3FFC, 0x3E90, 0x0A84, 0x0878, 0x3E04, 0x0050, 0x0004, + 0x001C, 0x0004, 0x3E7C, 0x0A54, 0x08B0, 0x3E0C, 0x004C, 0x0008, + 0x0018, 0x000C, 0x3E68, 0x0A28, 0x08E8, 0x3E18, 0x0044, 0x0008, + 0x0018, 0x0018, 0x3E54, 0x09F4, 0x0920, 0x3E20, 0x003C, 0x000C, + 0x0014, 0x0020, 0x3E48, 0x09C0, 0x0954, 0x3E2C, 0x0034, 0x0010, + 0x0010, 0x002C, 0x3E3C, 0x098C, 0x0988, 0x3E38, 0x002C, 0x0010 +}; + +//========================================= +// <num_taps> = 8 +// <num_phases> = 64 +// <scale_ratio> = 1.49999 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_8tap_64p_149[264] = { + 0x0008, 0x3E8C, 0x03F8, 0x0AE8, 0x03F8, 0x3E8C, 0x0008, 0x0000, + 0x000C, 0x3E8C, 0x03D0, 0x0AE8, 0x0420, 0x3E90, 0x0000, 0x0000, + 0x000C, 0x3E8C, 0x03AC, 0x0AE8, 0x0444, 0x3E90, 0x0000, 0x0000, + 0x0010, 0x3E90, 0x0384, 0x0AE0, 0x046C, 0x3E94, 0x3FFC, 0x0000, + 0x0014, 0x3E90, 0x035C, 0x0ADC, 0x0494, 0x3E94, 0x3FF8, 0x0004, + 0x0018, 0x3E90, 0x0334, 0x0AD8, 0x04BC, 0x3E98, 0x3FF4, 0x0004, + 0x001C, 0x3E94, 0x0310, 0x0AD0, 0x04E4, 0x3E9C, 0x3FEC, 0x0004, + 0x0020, 0x3E98, 0x02E8, 0x0AC4, 0x050C, 0x3EA0, 0x3FE8, 0x0008, + 0x0020, 0x3E98, 0x02C4, 0x0AC0, 0x0534, 0x3EA4, 0x3FE4, 0x0008, + 0x0024, 0x3E9C, 0x02A0, 0x0AB4, 0x055C, 0x3EAC, 0x3FDC, 0x0008, + 0x0024, 0x3EA0, 0x027C, 0x0AA8, 0x0584, 0x3EB0, 0x3FD8, 0x000C, + 0x0028, 0x3EA4, 0x0258, 0x0A9C, 0x05AC, 0x3EB8, 0x3FD0, 0x000C, + 0x0028, 0x3EA8, 0x0234, 0x0A90, 0x05D4, 0x3EC0, 0x3FC8, 0x0010, + 0x002C, 0x3EAC, 0x0210, 0x0A80, 0x05FC, 0x3EC8, 0x3FC4, 0x0010, + 0x002C, 0x3EB4, 0x01F0, 0x0A70, 0x0624, 0x3ED0, 0x3FBC, 0x0010, + 0x002C, 0x3EB8, 0x01CC, 0x0A60, 0x064C, 0x3EDC, 0x3FB4, 0x0014, + 0x0030, 0x3EBC, 0x01A8, 0x0A50, 0x0674, 0x3EE4, 0x3FB0, 0x0014, + 0x0030, 0x3EC4, 0x0188, 0x0A38, 0x069C, 0x3EF0, 0x3FA8, 0x0018, + 0x0030, 0x3ECC, 0x0168, 0x0A28, 0x06C0, 0x3EFC, 0x3FA0, 0x0018, + 0x0030, 0x3ED0, 0x0148, 0x0A14, 0x06E8, 0x3F08, 0x3F98, 0x001C, + 0x0030, 0x3ED8, 0x012C, 0x0A00, 0x070C, 0x3F14, 0x3F90, 0x001C, + 0x0034, 0x3EE0, 0x0108, 0x09E4, 0x0734, 0x3F24, 0x3F8C, 0x001C, + 0x0034, 0x3EE4, 0x00EC, 0x09CC, 0x0758, 0x3F34, 0x3F84, 0x0020, + 0x0034, 0x3EEC, 0x00D0, 0x09B8, 0x077C, 0x3F40, 0x3F7C, 0x0020, + 0x0034, 0x3EF4, 0x00B4, 0x0998, 0x07A4, 0x3F50, 0x3F74, 0x0024, + 0x0030, 0x3EFC, 0x0098, 0x0980, 0x07C8, 0x3F64, 0x3F6C, 0x0024, + 0x0030, 0x3F04, 0x0080, 0x0968, 0x07E8, 0x3F74, 0x3F64, 0x0024, + 0x0030, 0x3F0C, 0x0060, 0x094C, 0x080C, 0x3F88, 0x3F5C, 0x0028, + 0x0030, 0x3F14, 0x0048, 0x0930, 0x0830, 0x3F98, 0x3F54, 0x0028, + 0x0030, 0x3F1C, 0x0030, 0x0914, 0x0850, 0x3FAC, 0x3F4C, 0x0028, + 0x0030, 0x3F24, 0x0018, 0x08F0, 0x0874, 0x3FC0, 0x3F44, 0x002C, + 0x002C, 0x3F2C, 0x0000, 0x08D4, 0x0894, 0x3FD8, 0x3F3C, 0x002C, + 0x002C, 0x3F34, 0x3FEC, 0x08B4, 0x08B4, 0x3FEC, 0x3F34, 0x002C +}; + +//========================================= +// <num_taps> = 8 +// <num_phases> = 64 +// <scale_ratio> = 1.83332 (input/output) +// <sharpness> = 0 +// <CoefType> = ModifiedLanczos +// <CoefQuant> = 1.10 +// <CoefOut> = 1.12 +//========================================= +static const uint16_t filter_8tap_64p_183[264] = { + 0x3F88, 0x0048, 0x047C, 0x0768, 0x047C, 0x0048, 0x3F88, 0x0000, + 0x3F88, 0x003C, 0x0468, 0x076C, 0x0490, 0x0054, 0x3F84, 0x0000, + 0x3F8C, 0x0034, 0x0454, 0x0768, 0x04A4, 0x005C, 0x3F84, 0x0000, + 0x3F8C, 0x0028, 0x0444, 0x076C, 0x04B4, 0x0068, 0x3F80, 0x0000, + 0x3F90, 0x0020, 0x042C, 0x0768, 0x04C8, 0x0074, 0x3F80, 0x0000, + 0x3F90, 0x0018, 0x041C, 0x0764, 0x04DC, 0x0080, 0x3F7C, 0x0000, + 0x3F94, 0x0010, 0x0408, 0x075C, 0x04F0, 0x008C, 0x3F7C, 0x0000, + 0x3F94, 0x0004, 0x03F8, 0x0760, 0x0500, 0x0098, 0x3F7C, 0x3FFC, + 0x3F98, 0x0000, 0x03E0, 0x075C, 0x0514, 0x00A4, 0x3F78, 0x3FFC, + 0x3F9C, 0x3FF8, 0x03CC, 0x0754, 0x0528, 0x00B0, 0x3F78, 0x3FFC, + 0x3F9C, 0x3FF0, 0x03B8, 0x0754, 0x0538, 0x00BC, 0x3F78, 0x3FFC, + 0x3FA0, 0x3FE8, 0x03A4, 0x0750, 0x054C, 0x00CC, 0x3F74, 0x3FF8, + 0x3FA4, 0x3FE0, 0x0390, 0x074C, 0x055C, 0x00D8, 0x3F74, 0x3FF8, + 0x3FA4, 0x3FDC, 0x037C, 0x0744, 0x0570, 0x00E4, 0x3F74, 0x3FF8, + 0x3FA8, 0x3FD4, 0x0368, 0x0740, 0x0580, 0x00F4, 0x3F74, 0x3FF4, + 0x3FA8, 0x3FCC, 0x0354, 0x073C, 0x0590, 0x0104, 0x3F74, 0x3FF4, + 0x3FAC, 0x3FC8, 0x0340, 0x0730, 0x05A4, 0x0110, 0x3F74, 0x3FF4, + 0x3FB0, 0x3FC0, 0x0330, 0x0728, 0x05B4, 0x0120, 0x3F74, 0x3FF0, + 0x3FB0, 0x3FBC, 0x031C, 0x0724, 0x05C4, 0x0130, 0x3F70, 0x3FF0, + 0x3FB4, 0x3FB4, 0x0308, 0x0720, 0x05D4, 0x013C, 0x3F70, 0x3FF0, + 0x3FB8, 0x3FB0, 0x02F4, 0x0714, 0x05E4, 0x014C, 0x3F74, 0x3FEC, + 0x3FB8, 0x3FAC, 0x02E0, 0x0708, 0x05F8, 0x015C, 0x3F74, 0x3FEC, + 0x3FBC, 0x3FA8, 0x02CC, 0x0704, 0x0604, 0x016C, 0x3F74, 0x3FE8, + 0x3FC0, 0x3FA0, 0x02BC, 0x06F8, 0x0614, 0x017C, 0x3F74, 0x3FE8, + 0x3FC0, 0x3F9C, 0x02A8, 0x06F4, 0x0624, 0x018C, 0x3F74, 0x3FE4, + 0x3FC4, 0x3F98, 0x0294, 0x06E8, 0x0634, 0x019C, 0x3F74, 0x3FE4, + 0x3FC8, 0x3F94, 0x0284, 0x06D8, 0x0644, 0x01AC, 0x3F78, 0x3FE0, + 0x3FC8, 0x3F90, 0x0270, 0x06D4, 0x0650, 0x01BC, 0x3F78, 0x3FE0, + 0x3FCC, 0x3F8C, 0x025C, 0x06C8, 0x0660, 0x01D0, 0x3F78, 0x3FDC, + 0x3FCC, 0x3F8C, 0x024C, 0x06B8, 0x066C, 0x01E0, 0x3F7C, 0x3FDC, + 0x3FD0, 0x3F88, 0x0238, 0x06B0, 0x067C, 0x01F0, 0x3F7C, 0x3FD8, + 0x3FD4, 0x3F84, 0x0228, 0x069C, 0x0688, 0x0204, 0x3F80, 0x3FD8, + 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 +}; + +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_3tap_16p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_3tap_16p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_3tap_16p_149; + else + return filter_3tap_16p_183; +} + +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_3tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_3tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_3tap_64p_149; + else + return filter_3tap_64p_183; +} + +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_4tap_16p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_4tap_16p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_4tap_16p_149; + else + return filter_4tap_16p_183; +} + +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_4tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_4tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_4tap_64p_149; + else + return filter_4tap_64p_183; +} + +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_5tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_5tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_5tap_64p_149; + else + return filter_5tap_64p_183; +} + +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_6tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_6tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_6tap_64p_149; + else + return filter_6tap_64p_183; +} + +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_7tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_7tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_7tap_64p_149; + else + return filter_7tap_64p_183; +} + +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) +{ + if (ratio.value < dc_fixpt_one.value) + return filter_8tap_64p_upscale; + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + return filter_8tap_64p_116; + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + return filter_8tap_64p_149; + else + return filter_8tap_64p_183; +} + +const uint16_t *spl_get_filter_2tap_16p(void) +{ + return filter_2tap_16p; +} + +const uint16_t *spl_get_filter_2tap_64p(void) +{ + return filter_2tap_64p; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h new file mode 100644 index 000000000000..6d96aca53b24 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DC_SPL_SCL_FILTERS_H__ +#define __DC_SPL_SCL_FILTERS_H__ + +#include "dc_spl_types.h" + +const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_2tap_16p(void); +const uint16_t *spl_get_filter_2tap_64p(void); +const uint16_t *spl_get_filter_3tap_16p_upscale(void); +const uint16_t *spl_get_filter_3tap_16p_116(void); +const uint16_t *spl_get_filter_3tap_16p_149(void); +const uint16_t *spl_get_filter_3tap_16p_183(void); + +const uint16_t *spl_get_filter_4tap_16p_upscale(void); +const uint16_t *spl_get_filter_4tap_16p_116(void); +const uint16_t *spl_get_filter_4tap_16p_149(void); +const uint16_t *spl_get_filter_4tap_16p_183(void); + +const uint16_t *spl_get_filter_3tap_64p_upscale(void); +const uint16_t *spl_get_filter_3tap_64p_116(void); +const uint16_t *spl_get_filter_3tap_64p_149(void); +const uint16_t *spl_get_filter_3tap_64p_183(void); + +const uint16_t *spl_get_filter_4tap_64p_upscale(void); +const uint16_t *spl_get_filter_4tap_64p_116(void); +const uint16_t *spl_get_filter_4tap_64p_149(void); +const uint16_t *spl_get_filter_4tap_64p_183(void); + +const uint16_t *spl_get_filter_5tap_64p_upscale(void); +const uint16_t *spl_get_filter_5tap_64p_116(void); +const uint16_t *spl_get_filter_5tap_64p_149(void); +const uint16_t *spl_get_filter_5tap_64p_183(void); + +const uint16_t *spl_get_filter_6tap_64p_upscale(void); +const uint16_t *spl_get_filter_6tap_64p_116(void); +const uint16_t *spl_get_filter_6tap_64p_149(void); +const uint16_t *spl_get_filter_6tap_64p_183(void); + +const uint16_t *spl_get_filter_7tap_64p_upscale(void); +const uint16_t *spl_get_filter_7tap_64p_116(void); +const uint16_t *spl_get_filter_7tap_64p_149(void); +const uint16_t *spl_get_filter_7tap_64p_183(void); + +const uint16_t *spl_get_filter_8tap_64p_upscale(void); +const uint16_t *spl_get_filter_8tap_64p_116(void); +const uint16_t *spl_get_filter_8tap_64p_149(void); +const uint16_t *spl_get_filter_8tap_64p_183(void); +#endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h new file mode 100644 index 000000000000..c5ef15f16c68 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "os_types.h" +#include "dc_hw_types.h" +#ifndef ASSERT +#define ASSERT(_bool) (void *)0 +#endif +#include "include/fixed31_32.h" // fixed31_32 and related functions +#ifndef __DC_SPL_TYPES_H__ +#define __DC_SPL_TYPES_H__ + +enum lb_memory_config { + /* Enable all 3 pieces of memory */ + LB_MEMORY_CONFIG_0 = 0, + + /* Enable only the first piece of memory */ + LB_MEMORY_CONFIG_1 = 1, + + /* Enable only the second piece of memory */ + LB_MEMORY_CONFIG_2 = 2, + + /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the + * last piece of chroma memory used for the luma storage + */ + LB_MEMORY_CONFIG_3 = 3 +}; + +struct spl_size { + uint32_t width; + uint32_t height; +}; +struct spl_rect { + int x; + int y; + int width; + int height; +}; + +struct spl_ratios { + struct fixed31_32 horz; + struct fixed31_32 vert; + struct fixed31_32 horz_c; + struct fixed31_32 vert_c; +}; +struct spl_inits { + struct fixed31_32 h; + struct fixed31_32 h_c; + struct fixed31_32 v; + struct fixed31_32 v_c; +}; + +struct spl_taps { + uint32_t v_taps; + uint32_t h_taps; + uint32_t v_taps_c; + uint32_t h_taps_c; + bool integer_scaling; +}; +enum spl_view_3d { + SPL_VIEW_3D_NONE = 0, + SPL_VIEW_3D_FRAME_SEQUENTIAL, + SPL_VIEW_3D_SIDE_BY_SIDE, + SPL_VIEW_3D_TOP_AND_BOTTOM, + SPL_VIEW_3D_COUNT, + SPL_VIEW_3D_FIRST = SPL_VIEW_3D_FRAME_SEQUENTIAL +}; +/* Pixel format */ +enum spl_pixel_format { + /*graph*/ + SPL_PIXEL_FORMAT_UNINITIALIZED, + SPL_PIXEL_FORMAT_INDEX8, + SPL_PIXEL_FORMAT_RGB565, + SPL_PIXEL_FORMAT_ARGB8888, + SPL_PIXEL_FORMAT_ARGB2101010, + SPL_PIXEL_FORMAT_ARGB2101010_XRBIAS, + SPL_PIXEL_FORMAT_FP16, + /*video*/ + SPL_PIXEL_FORMAT_420BPP8, + SPL_PIXEL_FORMAT_420BPP10, + /*end of pixel format definition*/ + SPL_PIXEL_FORMAT_INVALID, + SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, + SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, + SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, + SPL_PIXEL_FORMAT_VIDEO_END = SPL_PIXEL_FORMAT_420BPP10, + SPL_PIXEL_FORMAT_UNKNOWN +}; + +/* Rotation angle */ +enum spl_rotation_angle { + SPL_ROTATION_ANGLE_0 = 0, + SPL_ROTATION_ANGLE_90, + SPL_ROTATION_ANGLE_180, + SPL_ROTATION_ANGLE_270, + SPL_ROTATION_ANGLE_COUNT +}; +enum spl_color_space { + SPL_COLOR_SPACE_UNKNOWN, + SPL_COLOR_SPACE_SRGB, + SPL_COLOR_SPACE_XR_RGB, + SPL_COLOR_SPACE_SRGB_LIMITED, + SPL_COLOR_SPACE_MSREF_SCRGB, + SPL_COLOR_SPACE_YCBCR601, + SPL_COLOR_SPACE_YCBCR709, + SPL_COLOR_SPACE_XV_YCC_709, + SPL_COLOR_SPACE_XV_YCC_601, + SPL_COLOR_SPACE_YCBCR601_LIMITED, + SPL_COLOR_SPACE_YCBCR709_LIMITED, + SPL_COLOR_SPACE_2020_RGB_FULLRANGE, + SPL_COLOR_SPACE_2020_RGB_LIMITEDRANGE, + SPL_COLOR_SPACE_2020_YCBCR, + SPL_COLOR_SPACE_ADOBERGB, + SPL_COLOR_SPACE_DCIP3, + SPL_COLOR_SPACE_DISPLAYNATIVE, + SPL_COLOR_SPACE_DOLBYVISION, + SPL_COLOR_SPACE_APPCTRL, + SPL_COLOR_SPACE_CUSTOMPOINTS, + SPL_COLOR_SPACE_YCBCR709_BLACK, +}; + +// Scratch space for calculating scaler params +struct spl_scaler_data { + int h_active; + int v_active; + struct spl_taps taps; + struct spl_rect viewport; + struct spl_rect viewport_c; + struct spl_rect recout; + struct spl_ratios ratios; + struct spl_inits inits; +}; + +/*==============================================================*/ +/* Below structs are defined to hold hw register data */ + +// SPL output is used to set below registers + +// MPC_SIZE - set based on scl_data h_active and v_active +struct mpc_size { + uint32_t width; + uint32_t height; +}; +// SCL_MODE - set based on scl_data.ratios and always_scale +enum scl_mode { + SCL_MODE_SCALING_444_BYPASS = 0, + SCL_MODE_SCALING_444_RGB_ENABLE = 1, + SCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + SCL_MODE_SCALING_420_YCBCR_ENABLE = 3, + SCL_MODE_SCALING_420_LUMA_BYPASS = 4, + SCL_MODE_SCALING_420_CHROMA_BYPASS = 5, + SCL_MODE_DSCL_BYPASS = 6 +}; +// SCL_BLACK_COLOR - set based on scl_data.format +struct scl_black_color { + uint32_t offset_rgb_y; + uint32_t offset_rgb_cbcr; +}; +// RATIO - set based on scl_data.ratios +struct ratio { + uint32_t h_scale_ratio; + uint32_t v_scale_ratio; + uint32_t h_scale_ratio_c; + uint32_t v_scale_ratio_c; +}; + +// INIT - set based on scl_data.init +struct init { + // SCL_HORZ_FILTER_INIT + uint32_t h_filter_init_frac; // SCL_H_INIT_FRAC + uint32_t h_filter_init_int; // SCL_H_INIT_INT + // SCL_HORZ_FILTER_INIT_C + uint32_t h_filter_init_frac_c; // SCL_H_INIT_FRAC_C + uint32_t h_filter_init_int_c; // SCL_H_INIT_INT_C + // SCL_VERT_FILTER_INIT + uint32_t v_filter_init_frac; // SCL_V_INIT_FRAC + uint32_t v_filter_init_int; // SCL_V_INIT_INT + // SCL_VERT_FILTER_INIT_C + uint32_t v_filter_init_frac_c; // SCL_V_INIT_FRAC_C + uint32_t v_filter_init_int_c; // SCL_V_INIT_INT_C + // SCL_VERT_FILTER_INIT_BOT + uint32_t v_filter_init_bot_frac; // SCL_V_INIT_FRAC_BOT + uint32_t v_filter_init_bot_int; // SCL_V_INIT_INT_BOT + // SCL_VERT_FILTER_INIT_BOT_C + uint32_t v_filter_init_bot_frac_c; // SCL_V_INIT_FRAC_BOT_C + uint32_t v_filter_init_bot_int_c; // SCL_V_INIT_INT_BOT_C +}; + +// FILTER - calculated based on scl_data ratios and taps + +// iSHARP +struct isharp_noise_det { + uint32_t enable; // ISHARP_NOISEDET_EN + uint32_t mode; // ISHARP_NOISEDET_MODE + uint32_t uthreshold; // ISHARP_NOISEDET_UTHRE + uint32_t dthreshold; // ISHARP_NOISEDET_DTHRE + uint32_t pwl_start_in; // ISHARP_NOISEDET_PWL_START_IN + uint32_t pwl_end_in; // ISHARP_NOISEDET_PWL_END_IN + uint32_t pwl_slope; // ISHARP_NOISEDET_PWL_SLOPE +}; +struct isharp_lba { + uint32_t mode; // ISHARP_LBA_MODE + uint32_t in_seg[6]; + uint32_t base_seg[6]; + uint32_t slope_seg[6]; +}; +struct isharp_fmt { + uint32_t mode; // ISHARP_FMT_MODE + uint32_t norm; // ISHARP_FMT_NORM +}; +struct isharp_nldelta_sclip { + uint32_t enable_p; // ISHARP_NLDELTA_SCLIP_EN_P + uint32_t pivot_p; // ISHARP_NLDELTA_SCLIP_PIVOT_P + uint32_t slope_p; // ISHARP_NLDELTA_SCLIP_SLOPE_P + uint32_t enable_n; // ISHARP_NLDELTA_SCLIP_EN_N + uint32_t pivot_n; // ISHARP_NLDELTA_SCLIP_PIVOT_N + uint32_t slope_n; // ISHARP_NLDELTA_SCLIP_SLOPE_N +}; +enum isharp_en { + ISHARP_DISABLE, + ISHARP_ENABLE +}; +// Below struct holds values that can be directly used to program +// hardware registers. No conversion/clamping is required +struct dscl_prog_data { + struct spl_rect recout; // RECOUT - set based on scl_data.recout + struct mpc_size mpc_size; + uint32_t dscl_mode; + struct scl_black_color scl_black_color; + struct ratio ratios; + struct init init; + struct spl_taps taps; // TAPS - set based on scl_data.taps + struct spl_rect viewport; + struct spl_rect viewport_c; + // raw filter + const uint16_t *filter_h; + const uint16_t *filter_v; + const uint16_t *filter_h_c; + const uint16_t *filter_v_c; + // EASF registers + uint32_t easf_matrix_mode; + uint32_t easf_ltonl_en; + uint32_t easf_v_en; + uint32_t easf_v_sharp_factor; + uint32_t easf_v_ring; + uint32_t easf_v_bf1_en; + uint32_t easf_v_bf2_mode; + uint32_t easf_v_bf3_mode; + uint32_t easf_v_bf2_flat1_gain; + uint32_t easf_v_bf2_flat2_gain; + uint32_t easf_v_bf2_roc_gain; + uint32_t easf_v_ringest_3tap_dntilt_uptilt; + uint32_t easf_v_ringest_3tap_uptilt_max; + uint32_t easf_v_ringest_3tap_dntilt_slope; + uint32_t easf_v_ringest_3tap_uptilt1_slope; + uint32_t easf_v_ringest_3tap_uptilt2_slope; + uint32_t easf_v_ringest_3tap_uptilt2_offset; + uint32_t easf_v_ringest_eventap_reduceg1; + uint32_t easf_v_ringest_eventap_reduceg2; + uint32_t easf_v_ringest_eventap_gain1; + uint32_t easf_v_ringest_eventap_gain2; + uint32_t easf_v_bf_maxa; + uint32_t easf_v_bf_maxb; + uint32_t easf_v_bf_mina; + uint32_t easf_v_bf_minb; + uint32_t easf_v_bf1_pwl_in_seg0; + uint32_t easf_v_bf1_pwl_base_seg0; + uint32_t easf_v_bf1_pwl_slope_seg0; + uint32_t easf_v_bf1_pwl_in_seg1; + uint32_t easf_v_bf1_pwl_base_seg1; + uint32_t easf_v_bf1_pwl_slope_seg1; + uint32_t easf_v_bf1_pwl_in_seg2; + uint32_t easf_v_bf1_pwl_base_seg2; + uint32_t easf_v_bf1_pwl_slope_seg2; + uint32_t easf_v_bf1_pwl_in_seg3; + uint32_t easf_v_bf1_pwl_base_seg3; + uint32_t easf_v_bf1_pwl_slope_seg3; + uint32_t easf_v_bf1_pwl_in_seg4; + uint32_t easf_v_bf1_pwl_base_seg4; + uint32_t easf_v_bf1_pwl_slope_seg4; + uint32_t easf_v_bf1_pwl_in_seg5; + uint32_t easf_v_bf1_pwl_base_seg5; + uint32_t easf_v_bf1_pwl_slope_seg5; + uint32_t easf_v_bf1_pwl_in_seg6; + uint32_t easf_v_bf1_pwl_base_seg6; + uint32_t easf_v_bf1_pwl_slope_seg6; + uint32_t easf_v_bf1_pwl_in_seg7; + uint32_t easf_v_bf1_pwl_base_seg7; + uint32_t easf_v_bf3_pwl_in_set0; + uint32_t easf_v_bf3_pwl_base_set0; + uint32_t easf_v_bf3_pwl_slope_set0; + uint32_t easf_v_bf3_pwl_in_set1; + uint32_t easf_v_bf3_pwl_base_set1; + uint32_t easf_v_bf3_pwl_slope_set1; + uint32_t easf_v_bf3_pwl_in_set2; + uint32_t easf_v_bf3_pwl_base_set2; + uint32_t easf_v_bf3_pwl_slope_set2; + uint32_t easf_v_bf3_pwl_in_set3; + uint32_t easf_v_bf3_pwl_base_set3; + uint32_t easf_v_bf3_pwl_slope_set3; + uint32_t easf_v_bf3_pwl_in_set4; + uint32_t easf_v_bf3_pwl_base_set4; + uint32_t easf_v_bf3_pwl_slope_set4; + uint32_t easf_v_bf3_pwl_in_set5; + uint32_t easf_v_bf3_pwl_base_set5; + uint32_t easf_h_en; + uint32_t easf_h_sharp_factor; + uint32_t easf_h_ring; + uint32_t easf_h_bf1_en; + uint32_t easf_h_bf2_mode; + uint32_t easf_h_bf3_mode; + uint32_t easf_h_bf2_flat1_gain; + uint32_t easf_h_bf2_flat2_gain; + uint32_t easf_h_bf2_roc_gain; + uint32_t easf_h_ringest_eventap_reduceg1; + uint32_t easf_h_ringest_eventap_reduceg2; + uint32_t easf_h_ringest_eventap_gain1; + uint32_t easf_h_ringest_eventap_gain2; + uint32_t easf_h_bf_maxa; + uint32_t easf_h_bf_maxb; + uint32_t easf_h_bf_mina; + uint32_t easf_h_bf_minb; + uint32_t easf_h_bf1_pwl_in_seg0; + uint32_t easf_h_bf1_pwl_base_seg0; + uint32_t easf_h_bf1_pwl_slope_seg0; + uint32_t easf_h_bf1_pwl_in_seg1; + uint32_t easf_h_bf1_pwl_base_seg1; + uint32_t easf_h_bf1_pwl_slope_seg1; + uint32_t easf_h_bf1_pwl_in_seg2; + uint32_t easf_h_bf1_pwl_base_seg2; + uint32_t easf_h_bf1_pwl_slope_seg2; + uint32_t easf_h_bf1_pwl_in_seg3; + uint32_t easf_h_bf1_pwl_base_seg3; + uint32_t easf_h_bf1_pwl_slope_seg3; + uint32_t easf_h_bf1_pwl_in_seg4; + uint32_t easf_h_bf1_pwl_base_seg4; + uint32_t easf_h_bf1_pwl_slope_seg4; + uint32_t easf_h_bf1_pwl_in_seg5; + uint32_t easf_h_bf1_pwl_base_seg5; + uint32_t easf_h_bf1_pwl_slope_seg5; + uint32_t easf_h_bf1_pwl_in_seg6; + uint32_t easf_h_bf1_pwl_base_seg6; + uint32_t easf_h_bf1_pwl_slope_seg6; + uint32_t easf_h_bf1_pwl_in_seg7; + uint32_t easf_h_bf1_pwl_base_seg7; + uint32_t easf_h_bf3_pwl_in_set0; + uint32_t easf_h_bf3_pwl_base_set0; + uint32_t easf_h_bf3_pwl_slope_set0; + uint32_t easf_h_bf3_pwl_in_set1; + uint32_t easf_h_bf3_pwl_base_set1; + uint32_t easf_h_bf3_pwl_slope_set1; + uint32_t easf_h_bf3_pwl_in_set2; + uint32_t easf_h_bf3_pwl_base_set2; + uint32_t easf_h_bf3_pwl_slope_set2; + uint32_t easf_h_bf3_pwl_in_set3; + uint32_t easf_h_bf3_pwl_base_set3; + uint32_t easf_h_bf3_pwl_slope_set3; + uint32_t easf_h_bf3_pwl_in_set4; + uint32_t easf_h_bf3_pwl_base_set4; + uint32_t easf_h_bf3_pwl_slope_set4; + uint32_t easf_h_bf3_pwl_in_set5; + uint32_t easf_h_bf3_pwl_base_set5; + uint32_t easf_matrix_c0; + uint32_t easf_matrix_c1; + uint32_t easf_matrix_c2; + uint32_t easf_matrix_c3; + // iSharp + uint32_t isharp_en; // ISHARP_EN + struct isharp_noise_det isharp_noise_det; // ISHARP_NOISEDET + uint32_t isharp_nl_en; // ISHARP_NL_EN ? TODO:check this + struct isharp_lba isharp_lba; // ISHARP_LBA + struct isharp_fmt isharp_fmt; // ISHARP_FMT + const uint32_t *isharp_delta; + struct isharp_nldelta_sclip isharp_nldelta_sclip; // ISHARP_NLDELTA_SCLIP +}; + +/* SPL input and output definitions */ +// SPL outputs struct +struct spl_out { + // Pack all SPL outputs in scl_data + struct spl_scaler_data scl_data; + // Pack all output need to program hw registers + struct dscl_prog_data *dscl_prog_data; +}; + +// end of SPL outputs + +// SPL inputs + +// Basic input information +struct basic_in { + enum spl_pixel_format format; // Pixel Format + enum chroma_cositing cositing; /* Chroma Subsampling Offset */ + struct spl_rect src_rect; // Source rect + struct spl_rect dst_rect; // Destination Rect + struct spl_rect clip_rect; // Clip rect + enum spl_rotation_angle rotation; // Rotation + bool horizontal_mirror; // Horizontal mirror + int mpc_combine_h; // MPC Horizontal Combine Factor (split_count) + int mpc_combine_v; // MPC Vertical Combine Factor (split_idx) + // Inputs for adaptive scaler - TODO + // struct dc_transfer_func transfer_func; // Transfer function + // enum dc_transfer_func_predefined tf; + enum spl_color_space color_space; // Color Space + unsigned int max_luminance; // Max Luminance TODO: Is determined in dc_hw_sequencer.c is_sdr + bool film_grain_applied; // Film Grain Applied // TODO: To check from where to get this? +}; + +// Basic output information +struct basic_out { + struct spl_size output_size; // Output Size + struct spl_rect dst_rect; // Destination Rect + struct spl_rect src_rect; // Source rect + int odm_combine_factor; // ODM Combine Factor determine by get_odm_splits + enum spl_view_3d view_format; // TODO: View format Check if it is chroma subsampling + bool always_scale; // Is always scale enabled? Required for getting SCL_MODE + int max_downscale_src_width; // Required to get optimal no of taps + bool alpha_en; +}; +enum explicit_sharpness { + SHARPNESS_LOW = 0, + SHARPNESS_MID, + SHARPNESS_HIGH +}; +struct adaptive_sharpness { + bool enable; + enum explicit_sharpness sharpness; +}; +enum linear_light_scaling { // convert it in translation logic + LLS_PREF_DONT_CARE = 0, + LLS_PREF_YES, + LLS_PREF_NO +}; +struct spl_funcs { + void (*spl_calc_lb_num_partitions) + (bool alpha_en, + const struct spl_scaler_data *scl_data, + enum lb_memory_config lb_config, + int *num_part_y, + int *num_part_c); +}; + +struct spl_in { + struct basic_out basic_out; + struct basic_in basic_in; + // Basic slice information + int odm_slice_index; // ODM Slice Index using get_odm_split_index + struct spl_taps scaling_quality; // Explicit Scaling Quality + struct spl_funcs *funcs; + // Inputs for isharp and EASF + struct adaptive_sharpness adaptive_sharpness; // Adaptive Sharpness + enum linear_light_scaling lls_pref; // Linear Light Scaling + bool prefer_easf; +}; +// end of SPL inputs + +#endif /* __DC_SPL_TYPES_H__ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c new file mode 100644 index 000000000000..cf139e9cc20e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "../dmub_srv.h" +#include "dmub_reg.h" +#include "dmub_dcn401.h" + +#include "dcn/dcn_4_1_0_offset.h" +#include "dcn/dcn_4_1_0_sh_mask.h" + +#define DCN_BASE__INST0_SEG2 0x000034C0 + +#define BASE_INNER(seg) DCN_BASE__INST0_SEG##seg +#define CTX dmub +#define REGS dmub->regs_dcn401 +#define REG_OFFSET_EXP(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name) + +const struct dmub_srv_dcn401_regs dmub_srv_dcn401_regs = { +#define DMUB_SR(reg) REG_OFFSET_EXP(reg), + { + DMUB_DCN401_REGS() + DMCUB_INTERNAL_REGS() + }, +#undef DMUB_SR + +#define DMUB_SF(reg, field) FD_MASK(reg, field), + { DMUB_DCN401_FIELDS() }, +#undef DMUB_SF + +#define DMUB_SF(reg, field) FD_SHIFT(reg, field), + { DMUB_DCN401_FIELDS() }, +#undef DMUB_SF +}; + +static void dmub_dcn401_get_fb_base_offset(struct dmub_srv *dmub, + uint64_t *fb_base, + uint64_t *fb_offset) +{ + uint32_t tmp; + + if (dmub->fb_base || dmub->fb_offset) { + *fb_base = dmub->fb_base; + *fb_offset = dmub->fb_offset; + return; + } + + REG_GET(DCN_VM_FB_LOCATION_BASE, FB_BASE, &tmp); + *fb_base = (uint64_t)tmp << 24; + + REG_GET(DCN_VM_FB_OFFSET, FB_OFFSET, &tmp); + *fb_offset = (uint64_t)tmp << 24; +} + +static inline void dmub_dcn401_translate_addr(const union dmub_addr *addr_in, + uint64_t fb_base, + uint64_t fb_offset, + union dmub_addr *addr_out) +{ + addr_out->quad_part = addr_in->quad_part - fb_base + fb_offset; +} + +void dmub_dcn401_reset(struct dmub_srv *dmub) +{ + union dmub_gpint_data_register cmd; + const uint32_t timeout = 30; + uint32_t in_reset, scratch, i; + + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + + if (in_reset == 0) { + cmd.bits.status = 1; + cmd.bits.command_code = DMUB_GPINT__STOP_FW; + cmd.bits.param = 0; + + dmub->hw_funcs.set_gpint(dmub, cmd); + + /** + * Timeout covers both the ACK and the wait + * for remaining work to finish. + * + * This is mostly bound by the PHY disable sequence. + * Each register check will be greater than 1us, so + * don't bother using udelay. + */ + + for (i = 0; i < timeout; ++i) { + if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) + break; + } + + for (i = 0; i < timeout; ++i) { + scratch = dmub->hw_funcs.get_gpint_response(dmub); + if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) + break; + } + + /* Force reset in case we timed out, DMCUB is likely hung. */ + } + + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + REG_WRITE(DMCUB_INBOX1_RPTR, 0); + REG_WRITE(DMCUB_INBOX1_WPTR, 0); + REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); + REG_WRITE(DMCUB_OUTBOX1_WPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_RPTR, 0); + REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); + REG_WRITE(DMCUB_SCRATCH0, 0); + + /* Clear the GPINT command manually so we don't reset again. */ + cmd.all = 0; + dmub->hw_funcs.set_gpint(dmub, cmd); +} + +void dmub_dcn401_reset_release(struct dmub_srv *dmub) +{ + REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); + REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF); + REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 0); +} + +void dmub_dcn401_backdoor_load(struct dmub_srv *dmub, + const struct dmub_window *cw0, + const struct dmub_window *cw1) +{ + union dmub_addr offset; + uint64_t fb_base, fb_offset; + + dmub_dcn401_get_fb_base_offset(dmub, &fb_base, &fb_offset); + + REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + + dmub_dcn401_translate_addr(&cw0->offset, fb_base, fb_offset, &offset); + + REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base); + REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0, + DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top, + DMCUB_REGION3_CW0_ENABLE, 1); + + dmub_dcn401_translate_addr(&cw1->offset, fb_base, fb_offset, &offset); + + REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base); + REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0, + DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top, + DMCUB_REGION3_CW1_ENABLE, 1); + + REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID, + 0x20); +} + +void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub, + const struct dmub_window *cw0, + const struct dmub_window *cw1) +{ + union dmub_addr offset; + + REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1); + + offset = cw0->offset; + + REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base); + REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0, + DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top, + DMCUB_REGION3_CW0_ENABLE, 1); + + offset = cw1->offset; + + REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base); + REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0, + DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top, + DMCUB_REGION3_CW1_ENABLE, 1); + + REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID, + 0x20); +} + +void dmub_dcn401_setup_windows(struct dmub_srv *dmub, + const struct dmub_window *cw2, + const struct dmub_window *cw3, + const struct dmub_window *cw4, + const struct dmub_window *cw5, + const struct dmub_window *cw6, + const struct dmub_window *region6) +{ + union dmub_addr offset; + + offset = cw3->offset; + + REG_WRITE(DMCUB_REGION3_CW3_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW3_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW3_BASE_ADDRESS, cw3->region.base); + REG_SET_2(DMCUB_REGION3_CW3_TOP_ADDRESS, 0, + DMCUB_REGION3_CW3_TOP_ADDRESS, cw3->region.top, + DMCUB_REGION3_CW3_ENABLE, 1); + + offset = cw4->offset; + + REG_WRITE(DMCUB_REGION3_CW4_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW4_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW4_BASE_ADDRESS, cw4->region.base); + REG_SET_2(DMCUB_REGION3_CW4_TOP_ADDRESS, 0, + DMCUB_REGION3_CW4_TOP_ADDRESS, cw4->region.top, + DMCUB_REGION3_CW4_ENABLE, 1); + + offset = cw5->offset; + + REG_WRITE(DMCUB_REGION3_CW5_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW5_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW5_BASE_ADDRESS, cw5->region.base); + REG_SET_2(DMCUB_REGION3_CW5_TOP_ADDRESS, 0, + DMCUB_REGION3_CW5_TOP_ADDRESS, cw5->region.top, + DMCUB_REGION3_CW5_ENABLE, 1); + + REG_WRITE(DMCUB_REGION5_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION5_OFFSET_HIGH, offset.u.high_part); + REG_SET_2(DMCUB_REGION5_TOP_ADDRESS, 0, + DMCUB_REGION5_TOP_ADDRESS, + cw5->region.top - cw5->region.base - 1, + DMCUB_REGION5_ENABLE, 1); + + offset = cw6->offset; + + REG_WRITE(DMCUB_REGION3_CW6_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW6_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW6_BASE_ADDRESS, cw6->region.base); + REG_SET_2(DMCUB_REGION3_CW6_TOP_ADDRESS, 0, + DMCUB_REGION3_CW6_TOP_ADDRESS, cw6->region.top, + DMCUB_REGION3_CW6_ENABLE, 1); + + offset = region6->offset; + + REG_WRITE(DMCUB_REGION6_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION6_OFFSET_HIGH, offset.u.high_part); + REG_SET_2(DMCUB_REGION6_TOP_ADDRESS, 0, + DMCUB_REGION6_TOP_ADDRESS, + region6->region.top - region6->region.base - 1, + DMCUB_REGION6_ENABLE, 1); +} + +void dmub_dcn401_setup_mailbox(struct dmub_srv *dmub, + const struct dmub_region *inbox1) +{ + REG_WRITE(DMCUB_INBOX1_BASE_ADDRESS, inbox1->base); + REG_WRITE(DMCUB_INBOX1_SIZE, inbox1->top - inbox1->base); +} + +uint32_t dmub_dcn401_get_inbox1_wptr(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_INBOX1_WPTR); +} + +uint32_t dmub_dcn401_get_inbox1_rptr(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_INBOX1_RPTR); +} + +void dmub_dcn401_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset) +{ + REG_WRITE(DMCUB_INBOX1_WPTR, wptr_offset); +} + +void dmub_dcn401_setup_out_mailbox(struct dmub_srv *dmub, + const struct dmub_region *outbox1) +{ + REG_WRITE(DMCUB_OUTBOX1_BASE_ADDRESS, outbox1->base); + REG_WRITE(DMCUB_OUTBOX1_SIZE, outbox1->top - outbox1->base); +} + +uint32_t dmub_dcn401_get_outbox1_wptr(struct dmub_srv *dmub) +{ + /** + * outbox1 wptr register is accessed without locks (dal & dc) + * and to be called only by dmub_srv_stat_get_notification() + */ + return REG_READ(DMCUB_OUTBOX1_WPTR); +} + +void dmub_dcn401_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset) +{ + /** + * outbox1 rptr register is accessed without locks (dal & dc) + * and to be called only by dmub_srv_stat_get_notification() + */ + REG_WRITE(DMCUB_OUTBOX1_RPTR, rptr_offset); +} + +bool dmub_dcn401_is_hw_init(struct dmub_srv *dmub) +{ + union dmub_fw_boot_status status; + uint32_t is_hw_init; + + status.all = REG_READ(DMCUB_SCRATCH0); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init); + + return is_hw_init != 0 && status.bits.dal_fw; +} + +bool dmub_dcn401_is_supported(struct dmub_srv *dmub) +{ + uint32_t supported = 0; + + REG_GET(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE, &supported); + + return supported; +} + +void dmub_dcn401_set_gpint(struct dmub_srv *dmub, + union dmub_gpint_data_register reg) +{ + REG_WRITE(DMCUB_GPINT_DATAIN1, reg.all); +} + +bool dmub_dcn401_is_gpint_acked(struct dmub_srv *dmub, + union dmub_gpint_data_register reg) +{ + union dmub_gpint_data_register test; + + reg.bits.status = 0; + test.all = REG_READ(DMCUB_GPINT_DATAIN1); + + return test.all == reg.all; +} + +uint32_t dmub_dcn401_get_gpint_response(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_SCRATCH7); +} + +uint32_t dmub_dcn401_get_gpint_dataout(struct dmub_srv *dmub) +{ + uint32_t dataout = REG_READ(DMCUB_GPINT_DATAOUT); + + REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 0); + + REG_WRITE(DMCUB_GPINT_DATAOUT, 0); + REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 1); + REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 0); + + REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 1); + + return dataout; +} + +union dmub_fw_boot_status dmub_dcn401_get_fw_boot_status(struct dmub_srv *dmub) +{ + union dmub_fw_boot_status status; + + status.all = REG_READ(DMCUB_SCRATCH0); + return status; +} + +void dmub_dcn401_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params) +{ + union dmub_fw_boot_options boot_options = {0}; + + boot_options.bits.z10_disable = params->disable_z10; + + boot_options.bits.skip_phy_access = params->disallow_phy_access; + + REG_WRITE(DMCUB_SCRATCH14, boot_options.all); +} + +void dmub_dcn401_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip) +{ + union dmub_fw_boot_options boot_options; + boot_options.all = REG_READ(DMCUB_SCRATCH14); + boot_options.bits.skip_phy_init_panel_sequence = skip; + REG_WRITE(DMCUB_SCRATCH14, boot_options.all); +} + +void dmub_dcn401_setup_outbox0(struct dmub_srv *dmub, + const struct dmub_region *outbox0) +{ + REG_WRITE(DMCUB_OUTBOX0_BASE_ADDRESS, outbox0->base); + + REG_WRITE(DMCUB_OUTBOX0_SIZE, outbox0->top - outbox0->base); +} + +uint32_t dmub_dcn401_get_outbox0_wptr(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_OUTBOX0_WPTR); +} + +void dmub_dcn401_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset) +{ + REG_WRITE(DMCUB_OUTBOX0_RPTR, rptr_offset); +} + +uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_TIMER_CURRENT); +} + +void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +{ + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + + if (!dmub || !diag_data) + return; + + memset(diag_data, 0, sizeof(*diag_data)); + + diag_data->dmcub_version = dmub->fw_version; + + diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); + diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); + diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); + diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); + diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); + diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); + diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); + diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); + diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); + diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); + diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); + diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); + diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); + diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); + diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); + diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); + diag_data->scratch[16] = REG_READ(DMCUB_SCRATCH16); + + diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); + diag_data->is_dmcub_enabled = is_dmub_enabled; + + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); + diag_data->is_dmcub_soft_reset = is_soft_reset; + + REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); + diag_data->is_dmcub_secure_reset = is_sec_reset; + + REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); + diag_data->is_traceport_en = is_traceport_enabled; + + REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); + diag_data->is_cw0_enabled = is_cw0_enabled; + + REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); + diag_data->is_cw6_enabled = is_cw6_enabled; + + diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); + diag_data->timeout_info = dmub->debug; +} +void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub) +{ + /* DMCUB_REGION3_TMR_AXI_SPACE values: + * 0b011 (0x3) - FB physical address + * 0b100 (0x4) - GPU virtual address + * + * Default value is 0x3 (FB Physical address for TMR). When programming + * DMUB to be in system memory, change to 0x4. The system memory allocated + * is accessible by both GPU and CPU, so we use GPU virtual address. + */ + REG_WRITE(DMCUB_REGION3_TMR_AXI_SPACE, 0x4); +} + +void dmub_dcn401_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data) +{ + REG_WRITE(DMCUB_INBOX0_WPTR, data.inbox0_cmd_common.all); +} + +void dmub_dcn401_clear_inbox0_ack_register(struct dmub_srv *dmub) +{ + REG_WRITE(DMCUB_SCRATCH17, 0); +} + +uint32_t dmub_dcn401_read_inbox0_ack_register(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_SCRATCH17); +} + +void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd) +{ + uint32_t *dwords = (uint32_t *)cmd; + + static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); + + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); + /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY + * interrupt. + */ + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); +} + +uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) +{ + uint32_t status; + + REG_GET(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_STAT, &status); + return status; +} + +void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd) +{ + uint32_t *dwords = (uint32_t *)cmd; + + static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); + + dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); + dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); +} + +void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); +} + +void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1); + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 0); +} + +void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg) +{ + *msg = REG_READ(DMCUB_REG_OUTBOX0_MSG0); +} + +void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *rsp) +{ + REG_WRITE(DMCUB_REG_OUTBOX0_RSP, *rsp); +} + +uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub) +{ + uint32_t status; + + REG_GET(DMCUB_INTERRUPT_STATUS, DMCUB_REG_OUTBOX0_RSP_INT_STAT, &status); + return status; +} + +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); +} + +void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0); +} + +uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub) +{ + uint32_t status; + + REG_GET(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_STAT, &status); + return status; +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h new file mode 100644 index 000000000000..4c8843b79695 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef _DMUB_DCN401_H_ +#define _DMUB_DCN401_H_ + +#include "dmub_dcn31.h" + +struct dmub_srv; + +/* DCN401 register definitions. */ + +#define DMUB_DCN401_REGS() \ + DMUB_SR(DMCUB_CNTL) \ + DMUB_SR(DMCUB_CNTL2) \ + DMUB_SR(DMCUB_SEC_CNTL) \ + DMUB_SR(DMCUB_INBOX0_SIZE) \ + DMUB_SR(DMCUB_INBOX0_RPTR) \ + DMUB_SR(DMCUB_INBOX0_WPTR) \ + DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \ + DMUB_SR(DMCUB_INBOX1_SIZE) \ + DMUB_SR(DMCUB_INBOX1_RPTR) \ + DMUB_SR(DMCUB_INBOX1_WPTR) \ + DMUB_SR(DMCUB_OUTBOX0_BASE_ADDRESS) \ + DMUB_SR(DMCUB_OUTBOX0_SIZE) \ + DMUB_SR(DMCUB_OUTBOX0_RPTR) \ + DMUB_SR(DMCUB_OUTBOX0_WPTR) \ + DMUB_SR(DMCUB_OUTBOX1_BASE_ADDRESS) \ + DMUB_SR(DMCUB_OUTBOX1_SIZE) \ + DMUB_SR(DMCUB_OUTBOX1_RPTR) \ + DMUB_SR(DMCUB_OUTBOX1_WPTR) \ + DMUB_SR(DMCUB_REGION3_CW0_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW1_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW2_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW3_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW4_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW5_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW6_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW7_OFFSET) \ + DMUB_SR(DMCUB_REGION3_CW0_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW1_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW2_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW3_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW4_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW5_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW6_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW7_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION3_CW0_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW1_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW2_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW3_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW4_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW5_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW6_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW7_BASE_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW0_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW1_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW2_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW3_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW4_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW5_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW6_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION3_CW7_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION4_OFFSET) \ + DMUB_SR(DMCUB_REGION4_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION4_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION5_OFFSET) \ + DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION6_OFFSET) \ + DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \ + DMUB_SR(DMCUB_SCRATCH0) \ + DMUB_SR(DMCUB_SCRATCH1) \ + DMUB_SR(DMCUB_SCRATCH2) \ + DMUB_SR(DMCUB_SCRATCH3) \ + DMUB_SR(DMCUB_SCRATCH4) \ + DMUB_SR(DMCUB_SCRATCH5) \ + DMUB_SR(DMCUB_SCRATCH6) \ + DMUB_SR(DMCUB_SCRATCH7) \ + DMUB_SR(DMCUB_SCRATCH8) \ + DMUB_SR(DMCUB_SCRATCH9) \ + DMUB_SR(DMCUB_SCRATCH10) \ + DMUB_SR(DMCUB_SCRATCH11) \ + DMUB_SR(DMCUB_SCRATCH12) \ + DMUB_SR(DMCUB_SCRATCH13) \ + DMUB_SR(DMCUB_SCRATCH14) \ + DMUB_SR(DMCUB_SCRATCH15) \ + DMUB_SR(DMCUB_SCRATCH16) \ + DMUB_SR(DMCUB_SCRATCH17) \ + DMUB_SR(DMCUB_GPINT_DATAIN0) \ + DMUB_SR(DMCUB_GPINT_DATAIN1) \ + DMUB_SR(DMCUB_GPINT_DATAOUT) \ + DMUB_SR(CC_DC_PIPE_DIS) \ + DMUB_SR(MMHUBBUB_SOFT_RESET) \ + DMUB_SR(DCN_VM_FB_LOCATION_BASE) \ + DMUB_SR(DCN_VM_FB_OFFSET) \ + DMUB_SR(DMCUB_TIMER_CURRENT) \ + DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \ + DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \ + DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) \ + DMUB_SR(DMCUB_REGION3_TMR_AXI_SPACE) \ + DMUB_SR(DMCUB_INTERRUPT_ENABLE) \ + DMUB_SR(DMCUB_INTERRUPT_ACK) \ + DMUB_SR(DMCUB_INTERRUPT_STATUS) \ + DMUB_SR(DMCUB_REG_INBOX0_RDY) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG0) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG1) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG2) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG3) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG4) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG5) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG6) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG7) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG8) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG9) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG10) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG11) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG12) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG13) \ + DMUB_SR(DMCUB_REG_INBOX0_MSG14) \ + DMUB_SR(DMCUB_REG_INBOX0_RSP) \ + DMUB_SR(DMCUB_REG_OUTBOX0_RDY) \ + DMUB_SR(DMCUB_REG_OUTBOX0_MSG0) \ + DMUB_SR(DMCUB_REG_OUTBOX0_RSP) \ + DMUB_SR(HOST_INTERRUPT_CSR) + +#define DMUB_DCN401_FIELDS() \ + DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ + DMUB_SF(DMCUB_CNTL, DMCUB_TRACEPORT_EN) \ + DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \ + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \ + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \ + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \ + DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE) \ + DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \ + DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ + DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \ + DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ + DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ + DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ + DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \ + DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \ + DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \ + DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \ + DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \ + DMUB_SF(DMCUB_INTERRUPT_STATUS, DMCUB_REG_OUTBOX0_RSP_INT_STAT) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_STAT) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_STAT) \ + DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN) + +struct dmub_srv_dcn401_reg_offset { +#define DMUB_SR(reg) uint32_t reg; + DMUB_DCN401_REGS() + DMCUB_INTERNAL_REGS() +#undef DMUB_SR +}; + +struct dmub_srv_dcn401_reg_shift { +#define DMUB_SF(reg, field) uint8_t reg##__##field; + DMUB_DCN401_FIELDS() +#undef DMUB_SF +}; + +struct dmub_srv_dcn401_reg_mask { +#define DMUB_SF(reg, field) uint32_t reg##__##field; + DMUB_DCN401_FIELDS() +#undef DMUB_SF +}; + +struct dmub_srv_dcn401_regs { + const struct dmub_srv_dcn401_reg_offset offset; + const struct dmub_srv_dcn401_reg_mask mask; + const struct dmub_srv_dcn401_reg_shift shift; +}; + +extern const struct dmub_srv_dcn401_regs dmub_srv_dcn401_regs; + +void dmub_dcn401_reset(struct dmub_srv *dmub); + +void dmub_dcn401_reset_release(struct dmub_srv *dmub); + +void dmub_dcn401_backdoor_load(struct dmub_srv *dmub, + const struct dmub_window *cw0, + const struct dmub_window *cw1); + +void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub, + const struct dmub_window *cw0, + const struct dmub_window *cw1); + +void dmub_dcn401_setup_windows(struct dmub_srv *dmub, + const struct dmub_window *cw2, + const struct dmub_window *cw3, + const struct dmub_window *cw4, + const struct dmub_window *cw5, + const struct dmub_window *cw6, + const struct dmub_window *region6); + +void dmub_dcn401_setup_mailbox(struct dmub_srv *dmub, + const struct dmub_region *inbox1); + +uint32_t dmub_dcn401_get_inbox1_wptr(struct dmub_srv *dmub); + +uint32_t dmub_dcn401_get_inbox1_rptr(struct dmub_srv *dmub); + +void dmub_dcn401_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset); + +void dmub_dcn401_setup_out_mailbox(struct dmub_srv *dmub, + const struct dmub_region *outbox1); + +uint32_t dmub_dcn401_get_outbox1_wptr(struct dmub_srv *dmub); + +void dmub_dcn401_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); + +bool dmub_dcn401_is_hw_init(struct dmub_srv *dmub); + +bool dmub_dcn401_is_supported(struct dmub_srv *dmub); + +void dmub_dcn401_set_gpint(struct dmub_srv *dmub, + union dmub_gpint_data_register reg); + +bool dmub_dcn401_is_gpint_acked(struct dmub_srv *dmub, + union dmub_gpint_data_register reg); + +uint32_t dmub_dcn401_get_gpint_response(struct dmub_srv *dmub); + +uint32_t dmub_dcn401_get_gpint_dataout(struct dmub_srv *dmub); + +void dmub_dcn401_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params); + +void dmub_dcn401_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip); + +union dmub_fw_boot_status dmub_dcn401_get_fw_boot_status(struct dmub_srv *dmub); + +void dmub_dcn401_setup_outbox0(struct dmub_srv *dmub, + const struct dmub_region *outbox0); + +uint32_t dmub_dcn401_get_outbox0_wptr(struct dmub_srv *dmub); + +void dmub_dcn401_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); + +uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub); + +void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); + +void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub); +void dmub_dcn401_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data); +void dmub_dcn401_clear_inbox0_ack_register(struct dmub_srv *dmub); +uint32_t dmub_dcn401_read_inbox0_ack_register(struct dmub_srv *dmub); + +void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd); +uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub); +void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd); +void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); +void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); +uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); +void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); +uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); + +#endif /* _DMUB_DCN401_H_ */ |