diff options
author | Dave Airlie <airlied@redhat.com> | 2024-09-11 04:21:55 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-09-11 04:22:47 +0300 |
commit | 741d73f587d5cc86db5e65cc107e031263302616 (patch) | |
tree | d4c1349b5b9a0648edac520123461cdc32b2fc87 | |
parent | 32bd3eb5fbab954e68adba8c0b6a43cf03605c93 (diff) | |
parent | 7a199557643e993d4e7357860624b8aa5d8f4340 (diff) | |
download | linux-741d73f587d5cc86db5e65cc107e031263302616.tar.xz |
Merge tag 'amd-drm-next-6.12-2024-09-06' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.12-2024-09-06:
amdgpu:
- IPS updates
- Post divider fix
- DML2 updates
- Misc static checker fixes
- DCN 3.5 fixes
- Replay fixes
- DMCUB updates
- SWSMU fixes
- DP MST fixes
- Add debug flag for per queue resets
- devcoredump updates
- SR-IOV fixes
- MES fixes
- Always allocate cleared VRAM for GEM
- Pipe reset for GC 9.4.3
- ODM policy fixes
- Per queue reset support for GC 10
- Per queue reset support for GC 11
- Per queue reset support for GC 12
- Display flickering fixes
- MPO fixes
- Display sharpening updates
amdkfd:
- SVM fix for IH for APUs
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240906211008.3072097-1-alexander.deucher@amd.com
75 files changed, 6024 insertions, 1359 deletions
diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 467e6843aef6..32723a925377 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -179,4 +179,4 @@ IP Blocks :doc: IP Blocks .. kernel-doc:: drivers/gpu/drm/amd/include/amd_shared.h - :identifiers: amd_ip_block_type amd_ip_funcs + :identifiers: amd_ip_block_type amd_ip_funcs DC_DEBUG_MASK diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7fe41a3c2541..e095572458cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1164,6 +1164,7 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + bool debug_exp_resets; bool enforce_isolation[MAX_XCP]; /* Added this mutex for cleaner shader isolation between GFX and compute processes */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index c63528a4e894..1254a43ec96b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1151,6 +1151,10 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, uint32_t low, high; uint64_t queue_addr = 0; + if (!adev->debug_exp_resets && + !adev->gfx.num_gfx_rings) + return 0; + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); amdgpu_gfx_rlc_enter_safe_mode(adev, inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index cf2b4dd4d865..5ac59b62020c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -28,8 +28,8 @@ #include "atom.h" #ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job) { } #else @@ -315,7 +315,9 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, } } - if (coredump->reset_vram_lost) + if (coredump->skip_vram_check) + drm_printf(&p, "VRAM lost check is skipped!\n"); + else if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); return count - iter.remain; @@ -326,12 +328,11 @@ static void amdgpu_devcoredump_free(void *data) kfree(data); } -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job) { - struct amdgpu_coredump_info *coredump; struct drm_device *dev = adev_to_drm(adev); - struct amdgpu_job *job = reset_context->job; + struct amdgpu_coredump_info *coredump; struct drm_sched_job *s_job; coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); @@ -341,11 +342,12 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, return; } + coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; - if (reset_context->job && reset_context->job->vm) { + if (job && job->vm) { + struct amdgpu_vm *vm = job->vm; struct amdgpu_task_info *ti; - struct amdgpu_vm *vm = reset_context->job->vm; ti = amdgpu_vm_get_task_info_vm(vm); if (ti) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index 52459512cb2b..ef9772c6bcc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -26,7 +26,6 @@ #define __AMDGPU_DEV_COREDUMP_H__ #include "amdgpu.h" -#include "amdgpu_reset.h" #ifdef CONFIG_DEV_COREDUMP @@ -36,12 +35,12 @@ struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; + bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; }; #endif -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); - +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 49ef22dcf7fb..f4628412dac4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4531,6 +4531,9 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); + + if (adev->mman.initialized) + drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; /* make sure IB test finished before entering exclusive mode @@ -4551,9 +4554,6 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) } amdgpu_fence_driver_hw_fini(adev); - if (adev->mman.initialized) - drain_workqueue(adev->mman.bdev.wq); - if (adev->pm.sysfs_initialized) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) @@ -5489,7 +5489,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5dd39e6c6223..8dee7c62c801 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -131,6 +131,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), + AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2199,6 +2200,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: enable RAS ACA\n"); adev->debug_enable_ras_aca = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) { + pr_info("debug: enable experimental reset features\n"); + adev->debug_exp_resets = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index aad2027e5c7c..0e617dff8765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -348,6 +348,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + /* always clear VRAM */ + flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b4efeef848de..b779d47a546a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -660,7 +660,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) uint64_t queue_mask = 0; int r, i, j; - if (adev->enable_mes) + if (adev->mes.enable_legacy_queue_map) return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) @@ -722,7 +722,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); - if (adev->enable_mes) { + if (adev->mes.enable_legacy_queue_map) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; r = amdgpu_mes_map_legacy_queue(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 597489dea114..ad6bf5d4e0a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -30,6 +30,60 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" +#include "amdgpu_dev_coredump.h" +#include "amdgpu_xgmi.h" + +static void amdgpu_job_do_core_dump(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int i; + + dev_info(adev->dev, "Dumping IP State\n"); + for (i = 0; i < adev->num_ip_blocks; i++) + if (adev->ip_blocks[i].version->funcs->dump_ip_state) + adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)adev); + dev_info(adev->dev, "Dumping IP State Completed\n"); + + amdgpu_coredump(adev, true, false, job); +} + +static void amdgpu_job_core_dump(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_hive_info *hive = NULL; + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + /* + * Reuse the logic in amdgpu_device_gpu_recover() to build list of + * devices for code dump + */ + INIT_LIST_HEAD(&device_list); + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + if (!list_is_first(&adev->reset_list, &device_list)) + list_rotate_to_front(&adev->reset_list, &device_list); + device_list_handle = &device_list; + } else { + list_add_tail(&adev->reset_list, &device_list); + device_list_handle = &device_list; + } + + /* Do the coredump for each device */ + list_for_each_entry(tmp_adev, device_list_handle, reset_list) + amdgpu_job_do_core_dump(tmp_adev, job); + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + } +} static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { @@ -48,9 +102,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - adev->job_hang = true; + /* + * Do the coredump immediately after a job timeout to get a very + * close dump/snapshot/representation of GPU's current error status + */ + amdgpu_job_core_dump(adev, job); + if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { dev_err(adev->dev, "ring %s timeout, but soft recovered\n", @@ -101,6 +160,12 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.src = AMDGPU_RESET_SRC_JOB; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* + * To avoid an unnecessary extra coredump, as we have already + * got the very close representation of GPU's error status + */ + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) dev_err(adev->dev, "GPU Recovery Failed: %d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 44c74a08987d..f7d5d4f08a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -826,6 +826,24 @@ int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, + int me_id, int pipe_id, int queue_id, int vmid) +{ + struct mes_reset_queue_input queue_input; + int r; + + queue_input.use_mmio = true; + queue_input.me_id = me_id; + queue_input.pipe_id = pipe_id; + queue_input.queue_id = queue_id; + queue_input.vmid = vmid; + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n", + queue_id); + return r; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -873,7 +891,8 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + bool use_mmio) { struct mes_reset_legacy_queue_input queue_input; int r; @@ -882,11 +901,13 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; + queue_input.me_id = ring->me; queue_input.pipe_id = ring->pipe; queue_input.queue_id = ring->queue; queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); queue_input.wptr_addr = ring->wptr_gpu_addr; queue_input.vmid = vmid; + queue_input.use_mmio = use_mmio; r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index a5b1ea60cac8..96788c0f42f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + bool enable_legacy_queue_map; uint32_t total_max_queue; uint32_t max_doorbell_slices; @@ -251,6 +252,13 @@ struct mes_remove_queue_input { struct mes_reset_queue_input { uint32_t doorbell_offset; uint64_t gang_context_addr; + bool use_mmio; + uint32_t queue_type; + uint32_t me_id; + uint32_t pipe_id; + uint32_t queue_id; + uint32_t xcc_id; + uint32_t vmid; }; struct mes_map_legacy_queue_input { @@ -287,6 +295,8 @@ struct mes_resume_gang_input { struct mes_reset_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; + bool use_mmio; + uint32_t me_id; uint32_t pipe_id; uint32_t queue_id; uint64_t mqd_addr; @@ -396,6 +406,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, + int me_id, int pipe_id, int queue_id, int vmid); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -405,7 +417,8 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, u64 gpu_addr, u64 seq); int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int vmid); + unsigned int vmid, + bool use_mmio); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ca983a014ba0..45ed97038df0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6692,13 +6692,13 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -6750,7 +6750,7 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v10_0_gfx_init_queue(ring); + r = gfx_v10_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -7030,13 +7030,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v10_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -7098,7 +7098,7 @@ static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v10_0_kcq_init_queue(ring); + r = gfx_v10_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -8949,7 +8949,9 @@ static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -9416,6 +9418,156 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } +static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + u64 addr; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + addr = amdgpu_bo_gpu_offset(ring->mqd_obj) + + offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active); + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (ring->pipe == 0) + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue); + else + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue); + + gfx_v10_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + 0, 1, 0x20); + gfx_v10_0_ring_emit_reg_wait(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + return amdgpu_ring_test_ring(ring); +} + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9619,6 +9771,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, + .reset = gfx_v10_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -9655,6 +9808,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, + .reset = gfx_v10_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index ee8604722467..d3e8be82a172 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3984,13 +3984,13 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -4026,7 +4026,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v11_0_gfx_init_queue(ring); + r = gfx_v11_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -4321,13 +4321,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -4391,7 +4391,7 @@ static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v11_0_kcq_init_queue(ring); + r = gfx_v11_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -4781,7 +4781,7 @@ static int gfx_v11_0_soft_reset(void *handle) int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v11_0_set_safe_mode(adev, 0); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); @@ -4900,7 +4900,7 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_unset_safe_mode(adev, 0); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return gfx_v11_0_cp_resume(adev); } @@ -5923,6 +5923,9 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; + if (adev->enable_mes) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -6088,7 +6091,9 @@ static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -6541,6 +6546,99 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) + return r; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v11_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r = 0; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* make sure dequeue is complete*/ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v11_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kcq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + static void gfx_v11_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -6742,6 +6840,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { @@ -6779,6 +6878,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 54059cbcfc08..d1357c01eb39 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2916,13 +2916,13 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v12_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v12_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -2958,7 +2958,7 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v12_0_gfx_init_queue(ring); + r = gfx_v12_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3262,13 +3262,13 @@ static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v12_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -3332,7 +3332,7 @@ static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v12_0_kcq_init_queue(ring); + r = gfx_v12_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -4501,6 +4501,9 @@ static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; + if (adev->enable_mes) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -4617,7 +4620,9 @@ static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -5155,6 +5160,93 @@ static void gfx_v12_ip_dump(void *handle) amdgpu_gfx_off_ctrl(adev, true); } +static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) { + dev_err(adev->dev, "reset via MES failed %d\n", r); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int r, i; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kcq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { .name = "gfx_v12_0", .early_init = gfx_v12_0_early_init, @@ -5217,6 +5309,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, + .reset = gfx_v12_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { @@ -5251,6 +5344,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, + .reset = gfx_v12_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 21089aadbb7b..8cf5d7925b51 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7233,6 +7233,10 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; + if (!adev->debug_exp_resets && + !adev->gfx.num_gfx_rings) + return -EINVAL; + if (amdgpu_sriov_vf(adev)) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2067f26d3a9d..408e5600bb61 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3052,6 +3052,9 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint32_t value = 0; + if (!adev->debug_exp_resets) + return; + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); @@ -3466,6 +3469,98 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me, + uint32_t pipe, uint32_t queue, + uint32_t xcc_id) +{ + int i, r; + /* make sure dequeue is complete*/ + gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + else + r = 0; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); + + return r; + +} + +static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev) +{ + /*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + adev->gfx.mec_fw_version >= 0x0000009b) + return true; + else + dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n"); + + return false; +} + +static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe, clean_pipe; + int r; + + if (!gfx_v9_4_3_pipe_reset_support(adev)) + return -EINVAL; + + gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + + reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL); + clean_pipe = reset_pipe; + + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + break; + default: + break; + } + } else { + if (ring->pipe) + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + else + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + } + + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + + r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id); + return r; +} + static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { @@ -3473,7 +3568,10 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r, i; + int r; + + if (!adev->debug_exp_resets) + return -EINVAL; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -3495,26 +3593,23 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n", + ring->name); + goto pipe_reset; + } + + r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id); if (r) - return r; + dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n"); - /* make sure dequeue is complete*/ - amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); - if (r) { - dev_err(adev->dev, "fail to wait on hqd deactive\n"); - return r; +pipe_reset: + if(r) { + r = gfx_v9_4_3_reset_hw_pipe(ring); + dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, + r ? "failed" : "successfully"); + if (r) + return r; } r = amdgpu_bo_reserve(ring->mqd_obj, false); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8edcd85a1261..0f055d1b1da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "soc15_common.h" #include "soc21.h" +#include "gfx_v11_0.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" #include "gc/gc_11_0_0_default.h" @@ -360,9 +361,83 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v11_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, struct mes_reset_queue_input *input) { + if (input->use_mmio) + return mes_v11_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + union MESAPI__RESET mes_reset_queue_pkt; memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); @@ -648,6 +723,11 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, { union MESAPI__RESET mes_reset_queue_pkt; + if (input->use_mmio) + return mes_v11_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -775,6 +855,28 @@ static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, (void **)&adev->mes.ucode_fw_ptr[pipe]); } +static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) +{ + int pipe; + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) { uint64_t ucode_addr; @@ -1144,18 +1246,6 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); - - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - return 0; } @@ -1402,15 +1492,24 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) mes_v11_0_enable(adev, true); + mes_v11_0_get_fw_version(adev); + mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring); r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) goto failure; - r = mes_v11_0_hw_init(adev); - if (r) - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47) + adev->mes.enable_legacy_queue_map = true; + else + adev->mes.enable_legacy_queue_map = false; + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v11_0_hw_init(adev); + if (r) + goto failure; + } return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 47a73f6ae4da..e499b2857a01 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1332,6 +1332,7 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.funcs = &mes_v12_0_funcs; adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.enable_legacy_queue_map = true; adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; @@ -1488,9 +1489,11 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); } - r = mes_v12_0_hw_init(adev); - if (r) - goto failure; + if (adev->mes.enable_legacy_queue_map) { + r = mes_v12_0_hw_init(adev); + if (r) + goto failure; + } return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ce2a5d9f90d3..40c94c4cdd96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2464,11 +2464,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, adev = pdd->dev->adev; /* Check and drain ih1 ring if cam not available */ - ih = &adev->irq.ih1; - checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); - if (ih->rptr != checkpoint_wptr) { - svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); - continue; + if (adev->irq.ih1.ring_size) { + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = + amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } } /* check if dev->irq.ih_soft is not empty */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5f7b178ba870..a8d0d1b71723 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1754,6 +1754,30 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * return bb; } +static enum dmub_ips_disable_type dm_get_default_ips_mode( + struct amdgpu_device *adev) +{ + /* + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to + * cause a hard hang. A fix exists for newer PMFW. + * + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest + * IPS state in all cases, except for s0ix and all displays off (DPMS), + * where IPS2 is allowed. + * + * When checking pmfw version, use the major and minor only. + */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) && + (adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) + return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) + return DMUB_IPS_ENABLE; + + /* ASICs older than DCN35 do not have IPSs */ + return DMUB_IPS_DISABLE_ALL; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1864,8 +1888,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; - else + else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS_DYNAMIC) + init_data.flags.disable_ips = DMUB_IPS_DISABLE_DYNAMIC; + else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS2_DYNAMIC) + init_data.flags.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + else if (amdgpu_dc_debug_mask & DC_FORCE_IPS_ENABLE) init_data.flags.disable_ips = DMUB_IPS_ENABLE; + else + init_data.flags.disable_ips = dm_get_default_ips_mode(adev); init_data.flags.disable_ips_in_vpb = 0; @@ -4507,7 +4537,7 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, struct amdgpu_dm_backlight_caps caps; struct dc_link *link; u32 brightness; - bool rc; + bool rc, reallow_idle = false; amdgpu_dm_update_backlight_caps(dm, bl_idx); caps = dm->backlight_caps[bl_idx]; @@ -4520,6 +4550,12 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, link = (struct dc_link *)dm->backlight_link[bl_idx]; /* Change brightness based on AUX property */ + mutex_lock(&dm->dc_lock); + if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) { + dc_allow_idle_optimizations(dm->dc, false); + reallow_idle = true; + } + if (caps.aux_support) { rc = dc_link_set_backlight_level_nits(link, true, brightness, AUX_BL_DEFAULT_TRANSITION_TIME_MS); @@ -4531,6 +4567,11 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } + if (dm->dc->caps.ips_support && reallow_idle) + dc_allow_idle_optimizations(dm->dc, true); + + mutex_unlock(&dm->dc_lock); + if (rc) dm->actual_brightness[bl_idx] = user_brightness; } @@ -6441,7 +6482,8 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); + DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from SST RX\n", + __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing, @@ -6460,7 +6502,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from DP-HDMI PCON\n", + DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n", __func__, drm_connector->name); } } @@ -11637,7 +11679,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (dc_resource_is_dsc_encoding_supported(dc)) { ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); if (ret) { - drm_dbg_atomic(dev, "compute_mst_dsc_configs_for_state() failed\n"); + drm_dbg_atomic(dev, "MST_DSC compute_mst_dsc_configs_for_state() failed\n"); ret = -EINVAL; goto fail; } @@ -11658,7 +11700,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, */ ret = drm_dp_mst_atomic_check(state); if (ret) { - drm_dbg_atomic(dev, "drm_dp_mst_atomic_check() failed\n"); + drm_dbg_atomic(dev, "MST drm_dp_mst_atomic_check() failed\n"); goto fail; } status = dc_validate_global_state(dc, dm_state->context, true); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 165e010fe69c..50109d13d967 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -759,7 +759,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( uint8_t ret = 0; drm_dbg_dp(aux->drm_dev, - "Configure DSC to non-virtual dpcd synaptics\n"); + "MST_DSC Configure DSC to non-virtual dpcd synaptics\n"); if (enable) { /* When DSC is enabled on previous boot and reboot with the hub, @@ -772,7 +772,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( apply_synaptics_fifo_reset_wa(aux); ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1); - DRM_INFO("Send DSC enable to synaptics\n"); + DRM_INFO("MST_DSC Send DSC enable to synaptics\n"); } else { /* Synaptics hub not support virtual dpcd, @@ -781,7 +781,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( */ if (!stream->link->link_status.link_active) { ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1); - DRM_INFO("Send DSC disable to synaptics\n"); + DRM_INFO("MST_DSC Send DSC disable to synaptics\n"); } } @@ -823,14 +823,14 @@ bool dm_helpers_dp_write_dsc_enable( DP_DSC_ENABLE, &enable_passthrough, 1); drm_dbg_dp(dev, - "Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", + "MST_DSC Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", ret); } ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Sent DSC decoding enable to %s port, ret = %u\n", + "MST_DSC Sent DSC decoding enable to %s port, ret = %u\n", (port->passthrough_aux) ? "remote RX" : "virtual dpcd", ret); @@ -838,7 +838,7 @@ bool dm_helpers_dp_write_dsc_enable( ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Sent DSC decoding disable to %s port, ret = %u\n", + "MST_DSC Sent DSC decoding disable to %s port, ret = %u\n", (port->passthrough_aux) ? "remote RX" : "virtual dpcd", ret); @@ -848,7 +848,7 @@ bool dm_helpers_dp_write_dsc_enable( DP_DSC_ENABLE, &enable_passthrough, 1); drm_dbg_dp(dev, - "Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", + "MST_DSC Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", ret); } } @@ -858,12 +858,12 @@ bool dm_helpers_dp_write_dsc_enable( if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Send DSC %s to SST RX\n", + "SST_DSC Send DSC %s to SST RX\n", enable_dsc ? "enable" : "disable"); } else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Send DSC %s to DP-HDMI PCON\n", + "SST_DSC Send DSC %s to DP-HDMI PCON\n", enable_dsc ? "enable" : "disable"); } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 2e9f6da1acdc..6b5eed37532b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -253,7 +253,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux; /* synaptics cascaded MST hub case */ - if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port)) + if (is_synaptics_cascaded_panamera(aconnector->dc_link, port)) aconnector->dsc_aux = port->mgr->aux; if (!aconnector->dsc_aux) @@ -578,6 +578,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, if (!aconnector) return NULL; + DRM_DEBUG_DRIVER("%s: Create aconnector 0x%p for port 0x%p\n", __func__, aconnector, port); + connector = &aconnector->base; aconnector->mst_output_port = port; aconnector->mst_root = master; @@ -872,11 +874,11 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p if (params[i].sink) { if (params[i].sink->sink_signal != SIGNAL_TYPE_VIRTUAL && params[i].sink->sink_signal != SIGNAL_TYPE_NONE) - DRM_DEBUG_DRIVER("%s i=%d dispname=%s\n", __func__, i, + DRM_DEBUG_DRIVER("MST_DSC %s i=%d dispname=%s\n", __func__, i, params[i].sink->edid_caps.display_name); } - DRM_DEBUG_DRIVER("dsc=%d bits_per_pixel=%d pbn=%d\n", + DRM_DEBUG_DRIVER("MST_DSC dsc=%d bits_per_pixel=%d pbn=%d\n", params[i].timing->flags.DSC, params[i].timing->dsc_cfg.bits_per_pixel, vars[i + k].pbn); @@ -1054,6 +1056,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; + DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index); vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, @@ -1064,10 +1067,12 @@ static int try_disable_dsc(struct drm_atomic_state *state, ret = drm_dp_mst_atomic_check(state); if (ret == 0) { + DRM_DEBUG_DRIVER("MST_DSC index #%d, greedily disable dsc\n", next_index); vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); + DRM_DEBUG_DRIVER("MST_DSC index #%d, restore minimum compression\n", next_index); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1087,15 @@ static int try_disable_dsc(struct drm_atomic_state *state, return 0; } +static void log_dsc_params(int count, struct dsc_mst_fairness_vars *vars, int k) +{ + int i; + + for (i = 0; i < count; i++) + DRM_DEBUG_DRIVER("MST_DSC DSC params: stream #%d --- dsc_enabled = %d, bpp_x16 = %d, pbn = %d\n", + i, vars[i + k].dsc_enabled, vars[i + k].bpp_x16, vars[i + k].pbn); +} + static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, struct dc_state *dc_state, struct dc_link *dc_link, @@ -1104,6 +1118,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, return PTR_ERR(mst_state); /* Set up params */ + DRM_DEBUG_DRIVER("%s: MST_DSC Set up params for %d streams\n", __func__, dc_state->stream_count); for (i = 0; i < dc_state->stream_count; i++) { struct dc_dsc_policy dsc_policy = {0}; @@ -1145,6 +1160,9 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing, dc_link_get_highest_encoding_format(dc_link)); + DRM_DEBUG_DRIVER("MST_DSC #%d stream 0x%p - max_kbps = %u, min_kbps = %u, uncompressed_kbps = %u\n", + count, stream, params[count].bw_range.max_kbps, params[count].bw_range.min_kbps, + params[count].bw_range.stream_kbps); count++; } @@ -1159,6 +1177,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, *link_vars_start_index += count; /* Try no compression */ + DRM_DEBUG_DRIVER("MST_DSC Try no compression\n"); for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); @@ -1177,7 +1196,10 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, return ret; } + log_dsc_params(count, vars, k); + /* Try max compression */ + DRM_DEBUG_DRIVER("MST_DSC Try max compression\n"); for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); @@ -1201,14 +1223,26 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret != 0) return ret; + log_dsc_params(count, vars, k); + /* Optimize degree of compression */ + DRM_DEBUG_DRIVER("MST_DSC Try optimize compression\n"); ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("MST_DSC Failed to optimize compression\n"); return ret; + } + log_dsc_params(count, vars, k); + + DRM_DEBUG_DRIVER("MST_DSC Try disable compression\n"); ret = try_disable_dsc(state, dc_link, params, vars, count, k); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("MST_DSC Failed to disable compression\n"); return ret; + } + + log_dsc_params(count, vars, k); set_dsc_configs_from_fairness_vars(params, vars, count, k); @@ -1230,17 +1264,19 @@ static bool is_dsc_need_re_compute( /* only check phy used by dsc mst branch */ if (dc_link->type != dc_connection_mst_branch) - return false; + goto out; /* add a check for older MST DSC with no virtual DPCDs */ if (needs_dsc_aux_workaround(dc_link) && (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT || dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT))) - return false; + goto out; for (i = 0; i < MAX_PIPES; i++) stream_on_link[i] = NULL; + DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in new dc_state\n", __func__, dc_state->stream_count); + /* check if there is mode change in new request */ for (i = 0; i < dc_state->stream_count; i++) { struct drm_crtc_state *new_crtc_state; @@ -1250,6 +1286,8 @@ static bool is_dsc_need_re_compute( if (!stream) continue; + DRM_DEBUG_DRIVER("%s:%d MST_DSC checking #%d stream 0x%p\n", __func__, __LINE__, i, stream); + /* check if stream using the same link for mst */ if (stream->link != dc_link) continue; @@ -1262,8 +1300,11 @@ static bool is_dsc_need_re_compute( new_stream_on_link_num++; new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); - if (!new_conn_state) + if (!new_conn_state) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_conn_state for stream 0x%p, aconnector 0x%p\n", + __func__, __LINE__, stream, aconnector); continue; + } if (IS_ERR(new_conn_state)) continue; @@ -1272,21 +1313,36 @@ static bool is_dsc_need_re_compute( continue; new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); - if (!new_crtc_state) + if (!new_crtc_state) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_crtc_state for crtc of stream 0x%p, aconnector 0x%p\n", + __func__, __LINE__, stream, aconnector); continue; + } if (IS_ERR(new_crtc_state)) continue; if (new_crtc_state->enable && new_crtc_state->active) { if (new_crtc_state->mode_changed || new_crtc_state->active_changed || - new_crtc_state->connectors_changed) - return true; + new_crtc_state->connectors_changed) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompte required." + "stream 0x%p in new dc_state\n", + __func__, __LINE__, stream); + is_dsc_need_re_compute = true; + goto out; + } } } - if (new_stream_on_link_num == 0) - return false; + if (new_stream_on_link_num == 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no mode change request for streams in new dc_state\n", + __func__, __LINE__); + is_dsc_need_re_compute = false; + goto out; + } + + DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in current dc_state\n", + __func__, dc->current_state->stream_count); /* check current_state if there stream on link but it is not in * new request state @@ -1310,11 +1366,18 @@ static bool is_dsc_need_re_compute( if (j == new_stream_on_link_num) { /* not in new state */ + DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required." + "stream 0x%p in current dc_state but not in new dc_state\n", + __func__, __LINE__, stream); is_dsc_need_re_compute = true; break; } } +out: + DRM_DEBUG_DRIVER("%s: MST_DSC dsc recompute %s\n", + __func__, is_dsc_need_re_compute ? "required" : "not required"); + return is_dsc_need_re_compute; } @@ -1343,6 +1406,9 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + DRM_DEBUG_DRIVER("%s: MST_DSC compute mst dsc configs for stream 0x%p, aconnector 0x%p\n", + __func__, stream, aconnector); + if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port) continue; @@ -1375,8 +1441,11 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, stream = dc_state->streams[i]; if (stream->timing.flags.DSC == 1) - if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) + if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC Failed to request dsc hw resource for stream 0x%p\n", + __func__, __LINE__, stream); return -EINVAL; + } } return ret; @@ -1405,6 +1474,9 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + DRM_DEBUG_DRIVER("MST_DSC pre compute mst dsc configs for #%d stream 0x%p, aconnector 0x%p\n", + i, stream, aconnector); + if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port) continue; @@ -1494,12 +1566,12 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ret = 0; if (!is_dsc_precompute_needed(state)) { - DRM_INFO_ONCE("DSC precompute is not needed.\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dsc precompute is not needed\n", __func__, __LINE__); return 0; } ret = dm_atomic_get_state(state, dm_state_ptr); if (ret != 0) { - DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dm_atomic_get_state() failed\n", __func__, __LINE__); return ret; } dm_state = *dm_state_ptr; @@ -1553,7 +1625,8 @@ int pre_validate_dsc(struct drm_atomic_state *state, ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); if (ret != 0) { - DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dsc pre_compute_mst_dsc_configs_for_state() failed\n", + __func__, __LINE__); ret = -EINVAL; goto clean_exit; } @@ -1567,12 +1640,15 @@ int pre_validate_dsc(struct drm_atomic_state *state, if (local_dc_state->streams[i] && dc_is_timing_changed(stream, local_dc_state->streams[i])) { - DRM_INFO_ONCE("crtc[%d] needs mode_changed\n", i); + DRM_INFO_ONCE("%s:%d MST_DSC crtc[%d] needs mode_change\n", __func__, __LINE__, i); } else { int ind = find_crtc_index_in_state_by_stream(state, stream); - if (ind >= 0) + if (ind >= 0) { + DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n", + __func__, __LINE__, stream); state->crtcs[ind].new_state->mode_changed = 0; + } } } clean_exit: @@ -1697,7 +1773,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); if (stream_kbps <= end_to_end_bw_in_kbps) { - DRM_DEBUG_DRIVER("No DSC needed. End-to-end bw sufficient."); + DRM_DEBUG_DRIVER("MST_DSC no dsc required. End-to-end bw sufficient\n"); return DC_OK; } @@ -1710,7 +1786,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( /*capable of dsc passthough. dsc bitstream along the entire path*/ if (aconnector->mst_output_port->passthrough_aux) { if (bw_range.min_kbps > end_to_end_bw_in_kbps) { - DRM_DEBUG_DRIVER("DSC passthrough. Max dsc compression can't fit into end-to-end bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc passthrough and decode at endpoint" + "Max dsc compression bw can't fit into end-to-end bw\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } else { @@ -1721,7 +1798,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( /*Get last DP link BW capability*/ if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { if (stream_kbps > end_link_bw) { - DRM_DEBUG_DRIVER("DSC decode at last link. Mode required bw can't fit into available bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } @@ -1734,7 +1812,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { - DRM_DEBUG_DRIVER("DSC decode at last link. Max dsc compression can't fit into MST available bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Max dsc compression can't fit into MST available bw\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } @@ -1751,9 +1830,9 @@ enum dc_status dm_dp_mst_is_port_support_mode( dc_link_get_highest_encoding_format(stream->link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("Require dsc and dsc config found\n"); + DRM_DEBUG_DRIVER("MST_DSC require dsc and dsc config found\n"); } else { - DRM_DEBUG_DRIVER("Require dsc but can't find appropriate dsc config\n"); + DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find appropriate dsc config\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } @@ -1775,11 +1854,11 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (branch_max_throughput_mps != 0 && ((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000)) { - DRM_DEBUG_DRIVER("DSC is required but max throughput mps fails"); + DRM_DEBUG_DRIVER("MST_DSC require dsc but max throughput mps fails\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } else { - DRM_DEBUG_DRIVER("DSC is required but can't find common dsc config."); + DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find common dsc config\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a573a6639898..25f63b2e7a8e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1283,6 +1283,7 @@ int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc struct dc_cursor_position *position) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(plane->dev); int x, y; int xorigin = 0, yorigin = 0; @@ -1314,12 +1315,14 @@ int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc y = 0; } position->enable = true; - position->translate_by_source = true; position->x = x; position->y = y; position->x_hotspot = xorigin; position->y_hotspot = yorigin; + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(4, 0, 1)) + position->translate_by_source = true; + return 0; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 4254bdfefe38..7d18f372ce7a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -227,7 +227,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t frev; uint8_t crev = 0; - if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev)) + if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) && (bp->base.ctx->dc->ctx->dce_version <= DCN_VERSION_2_0)) BREAK_TO_DEBUGGER(); switch (crev) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 0ce9b40dfc68..97164b5585a8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -305,9 +305,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; - if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) - new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; - /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -385,6 +382,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); @@ -1100,7 +1100,7 @@ void dcn35_clk_mgr_construct( clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, - DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + DC_MEM_ALLOC_TYPE_GART, sizeof(struct dcn35_watermarks), &clk_mgr->smu_wm_set.mc_address.quad_part); @@ -1112,7 +1112,7 @@ void dcn35_clk_mgr_construct( smu_dpm_clks.dpm_clks = (DpmClocks_t_dcn35 *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, - DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + DC_MEM_ALLOC_TYPE_GART, sizeof(DpmClocks_t_dcn35), &smu_dpm_clks.mc_address.quad_part); @@ -1209,7 +1209,7 @@ void dcn35_clk_mgr_construct( } if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0) - dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, smu_dpm_clks.dpm_clks); if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 01ea3a31e54d..8cfc5f435937 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1366,9 +1366,6 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, unsigned int num_steps = 0; - if (dc->work_arounds.skip_clock_update) - return; - if (dc->debug.enable_legacy_clock_update) { dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower); return; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c8dabb081b3d..ae788154896c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2690,6 +2690,9 @@ static enum surface_update_type check_update_surfaces_for_stream( stream_update->vrr_active_variable || stream_update->vrr_active_fixed)) su_flags->bits.fams_changed = 1; + if (stream_update->scaler_sharpener_update) + su_flags->bits.scaler_sharpener = 1; + if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -3022,6 +3025,8 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config = NULL; } } + if (update->scaler_sharpener_update) + stream->scaler_sharpener_update = *update->scaler_sharpener_update; } static void backup_planes_and_stream_state( @@ -4713,7 +4718,8 @@ static bool full_update_required(struct dc *dc, stream_update->func_shaper || stream_update->lut3d_func || stream_update->pending_test_pattern || - stream_update->crtc_timing_adjust)) + stream_update->crtc_timing_adjust || + stream_update->scaler_sharpener_update)) return true; if (stream) { @@ -5161,6 +5167,8 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc_z10_restore(dc); + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); + dc->hwss.init_hw(dc); if (dc->hwss.init_sys_ctx != NULL && @@ -5172,6 +5180,8 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) default: ASSERT(dc->current_state->stream_count == 0); + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); + dc_state_destruct(dc->current_state); break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ef585a89847b..c7599c40d4be 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1506,8 +1506,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; - spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5bbc7d2daca6..4c94dd38be4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.297" +#define DC_VER "3.2.299" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -761,7 +761,8 @@ union dpia_debug_options { uint32_t extend_aux_rd_interval:1; /* bit 2 */ uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ - uint32_t reserved:27; + uint32_t disable_usb4_pm_support:1; /* bit 5 */ + uint32_t reserved:26; } bits; uint32_t raw; }; @@ -1051,6 +1052,7 @@ struct dc_debug_options { unsigned int disable_spl; unsigned int force_easf; unsigned int force_sharpness; + unsigned int force_sharpness_level; unsigned int force_lls; bool notify_dpia_hr_bw; bool enable_ips_visual_confirm; @@ -1347,7 +1349,7 @@ struct dc_plane_state { enum mpcc_movable_cm_location mcm_location; struct dc_csc_transform cursor_csc_color_matrix; bool adaptive_sharpness_en; - unsigned int sharpnessX1000; + int sharpness_level; enum linear_light_scaling linear_light_scaling; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index b1265124608b..1e7de0f03290 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1476,7 +1476,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips2_exit_count); } -void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState) +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state) { struct dmub_srv *dmub; @@ -1485,12 +1485,38 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_c dmub = dc_dmub_srv->dmub; - if (powerState == DC_ACPI_CM_POWER_STATE_D0) + if (power_state == DC_ACPI_CM_POWER_STATE_D0) dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0); else dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3); } +void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv, + enum dc_acpi_cm_power_state power_state) +{ + union dmub_rb_cmd cmd; + + if (!dc_dmub_srv) + return; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.idle_opt_set_dc_power_state.header.type = DMUB_CMD__IDLE_OPT; + cmd.idle_opt_set_dc_power_state.header.sub_type = DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE; + cmd.idle_opt_set_dc_power_state.header.payload_bytes = + sizeof(cmd.idle_opt_set_dc_power_state) - sizeof(cmd.idle_opt_set_dc_power_state.header); + + if (power_state == DC_ACPI_CM_POWER_STATE_D0) { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D0; + } else if (power_state == DC_ACPI_CM_POWER_STATE_D3) { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D3; + } else { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN; + } + + dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv) { volatile const struct dmub_shared_state_ips_fw *ips_fw; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 580940222777..42f0cb672d8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -109,7 +109,29 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle); -void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); +/** + * dc_dmub_srv_set_power_state() - Sets the power state for DMUB service. + * + * Controls whether messaging the DMCUB or interfacing with it via HW register + * interaction is permittable. + * + * @dc_dmub_srv - The DC DMUB service pointer + * @power_state - the DC power state + */ +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state); + +/** + * dc_dmub_srv_notify_fw_dc_power_state() - Notifies firmware of the DC power state. + * + * Differs from dc_dmub_srv_set_power_state in that it needs to access HW in order + * to message DMCUB of the state transition. Should come after the D0 exit and + * before D3 set power state. + * + * @dc_dmub_srv - The DC DMUB service pointer + * @power_state - the DC power state + */ +void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv, + enum dc_acpi_cm_power_state power_state); /** * @dc_dmub_srv_should_detect() - Checks if link detection is required. diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 8f85a1db5eba..cd6de93eb91c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -42,26 +42,26 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; - scaling_quality->h_taps = spl_scaling_quality->h_taps; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; - scaling_quality->v_taps = spl_scaling_quality->v_taps; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; + scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; + scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct spl_ratios *spl_ratios) + const struct ratio *spl_ratios) { - ratios->horz = spl_ratios->horz; - ratios->vert = spl_ratios->vert; - ratios->horz_c = spl_ratios->horz_c; - ratios->vert_c = spl_ratios->vert_c; + ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); + ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); + ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); + ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct spl_inits *spl_inits) + const struct init *spl_inits) { - inits->h = spl_inits->h; - inits->v = spl_inits->v; - inits->h_c = spl_inits->h_c; - inits->v_c = spl_inits->v_c; + inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); + inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); + inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); + inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -139,24 +139,36 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 2) spl_in->disable_easf = true; /* Translate adaptive sharpening preference */ - if (pipe_ctx->stream->ctx->dc->debug.force_sharpness > 0) { - spl_in->adaptive_sharpness.enable = (pipe_ctx->stream->ctx->dc->debug.force_sharpness > 1) ? true : false; - if (pipe_ctx->stream->ctx->dc->debug.force_sharpness == 2) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_LOW; - else if (pipe_ctx->stream->ctx->dc->debug.force_sharpness == 3) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_MID; - else if (pipe_ctx->stream->ctx->dc->debug.force_sharpness >= 4) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_HIGH; - } else { - spl_in->adaptive_sharpness.enable = plane_state->adaptive_sharpness_en; - if (plane_state->sharpnessX1000 == 0) + unsigned int sharpness_setting = pipe_ctx->stream->ctx->dc->debug.force_sharpness; + unsigned int force_sharpness_level = pipe_ctx->stream->ctx->dc->debug.force_sharpness_level; + if (sharpness_setting == SHARPNESS_HW_OFF) + spl_in->adaptive_sharpness.enable = false; + else if (sharpness_setting == SHARPNESS_ZERO) { + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = 0; + } else if (sharpness_setting == SHARPNESS_CUSTOM) { + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = 0; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = 1750; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = 750; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = 0; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = 3500; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = 1500; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = 0; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = 2750; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = 1500; + + if (force_sharpness_level > 0) { + if (force_sharpness_level > 10) + force_sharpness_level = 10; + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = force_sharpness_level; + } else if (!plane_state->adaptive_sharpness_en) { spl_in->adaptive_sharpness.enable = false; - else if (plane_state->sharpnessX1000 < 999) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_LOW; - else if (plane_state->sharpnessX1000 < 1999) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_MID; - else // Any other value is high sharpness - spl_in->adaptive_sharpness.sharpness = SHARPNESS_HIGH; + spl_in->adaptive_sharpness.sharpness_level = 0; + } else { + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = plane_state->sharpness_level; + } } // Translate linear light scaling preference if (pipe_ctx->stream->ctx->dc->debug.force_lls > 0) @@ -171,6 +183,22 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + + spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; + /* Check if it is stream is in fullscreen and if its HDR. + * Use this to determine sharpness levels + */ + spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream); + spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); + spl_in->hdr_multx100 = 0; + if (spl_in->is_hdr_on) { + spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult, + dc_fixpt_from_int(100))); + /* Disable sharpness for HDR Mult > 6.0 */ + if (spl_in->hdr_multx100 > 600) + spl_in->adaptive_sharpness.enable = false; + } } /// @brief Translate SPL output parameters to pipe context @@ -179,15 +207,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index c73d640c3632..eaa5c5373b28 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,6 +6,7 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" +#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index de9bd72ca514..14ea47eda0c8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -142,6 +142,7 @@ union stream_update_flags { uint32_t mst_bw : 1; uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; + uint32_t scaler_sharpener : 1; } bits; uint32_t raw; @@ -308,6 +309,7 @@ struct dc_stream_state { bool is_phantom; struct luminance_data lumin_data; + bool scaler_sharpener_update; }; #define ABM_LEVEL_IMMEDIATE_DISABLE 255 @@ -353,6 +355,7 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; bool *hw_cursor_req; + bool *scaler_sharpener_update; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 97279b080f3e..fd6dca735714 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1050,6 +1050,23 @@ union replay_error_status { unsigned char raw; }; +union replay_low_refresh_rate_enable_options { + struct { + //BIT[0-3]: Replay Low Hz Support control + unsigned int ENABLE_LOW_RR_SUPPORT :1; + unsigned int RESERVED_1_3 :3; + //BIT[4-15]: Replay Low Hz Enable Scenarios + unsigned int ENABLE_STATIC_SCREEN :1; + unsigned int ENABLE_FULL_SCREEN_VIDEO :1; + unsigned int ENABLE_GENERAL_UI :1; + unsigned int RESERVED_7_15 :9; + //BIT[16-31]: Replay Low Hz Enable Check + unsigned int ENABLE_STATIC_FLICKER_CHECK :1; + unsigned int RESERVED_17_31 :15; + } bits; + unsigned int raw; +}; + struct replay_config { /* Replay feature is supported */ bool replay_supported; @@ -1073,6 +1090,8 @@ struct replay_config { bool replay_support_fast_resync_in_ultra_sleep_mode; /* Replay error status */ union replay_error_status replay_error_status; + /* Replay Low Hz enable Options */ + union replay_low_refresh_rate_enable_options low_rr_enable_options; }; /* Replay feature flags*/ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 6ac2bd86c4db..160c299419b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -328,6 +328,17 @@ type DPSTREAMCLK1_GATE_DISABLE;\ type DPSTREAMCLK2_GATE_DISABLE;\ type DPSTREAMCLK3_GATE_DISABLE;\ + type SYMCLKA_FE_GATE_DISABLE;\ + type SYMCLKB_FE_GATE_DISABLE;\ + type SYMCLKC_FE_GATE_DISABLE;\ + type SYMCLKD_FE_GATE_DISABLE;\ + type SYMCLKE_FE_GATE_DISABLE;\ + type SYMCLKA_GATE_DISABLE;\ + type SYMCLKB_GATE_DISABLE;\ + type SYMCLKC_GATE_DISABLE;\ + type SYMCLKD_GATE_DISABLE;\ + type SYMCLKE_GATE_DISABLE;\ + #define DCCG401_REG_FIELD_LIST(type) \ type OTG0_TMDS_PIXEL_RATE_DIV;\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 7f91e48902e2..ee02b78e290f 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -24,6 +24,7 @@ #include "reg_helper.h" #include "core_types.h" +#include "resource.h" #include "dcn35_dccg.h" #define TO_DCN_DCCG(dccg)\ @@ -136,7 +137,7 @@ static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable) return; switch (inst) { @@ -165,7 +166,7 @@ static void dccg35_set_symclk32_se_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable) return; /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ @@ -204,7 +205,7 @@ static void dccg35_set_symclk32_le_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le && enable) return; switch (inst) { @@ -231,7 +232,7 @@ static void dccg35_set_physymclk_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk && enable) return; switch (inst) { @@ -262,35 +263,45 @@ static void dccg35_set_physymclk_rcg( } static void dccg35_set_symclk_fe_rcg( - struct dccg *dccg, - int inst, - bool enable) + struct dccg *dccg, + int inst, + bool enable) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable) return; switch (inst) { case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKA_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKB_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKC_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKD_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKE_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); @@ -307,27 +318,37 @@ static void dccg35_set_symclk_be_rcg( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); /* TBD add symclk_be in rcg control bits */ - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable) return; switch (inst) { case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKA_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKB_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKC_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKD_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; @@ -342,7 +363,7 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) return; switch (inst) { @@ -370,7 +391,7 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg, struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) return; switch (inst) { @@ -399,7 +420,7 @@ static void dccg35_set_dpstreamclk_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream && enable) return; switch (inst) { @@ -436,7 +457,7 @@ static void dccg35_set_smclk32_se_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable) return; switch (inst) { @@ -1082,7 +1103,8 @@ static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) uint32_t dispclk_rdivider_value = 0; REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value); - REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value); + if (dispclk_rdivider_value != 0) + REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value); } static void dcn35_set_dppclk_enable(struct dccg *dccg, @@ -1692,6 +1714,12 @@ static void dccg35_disable_symclk32_se( } } +static void dccg35_init_cb(struct dccg *dccg) +{ + (void)dccg; + /* Any RCG should be done when driver enter low power mode*/ +} + void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -2042,8 +2070,6 @@ static void dccg35_set_dpstreamclk_cb( enum dtbclk_source dtb_clk_src; enum dp_stream_clk_source dp_stream_clk_src; - ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); - switch (src) { case REFCLK: dtb_clk_src = DTBCLK_REFCLK; @@ -2098,6 +2124,13 @@ static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + if (dccg->dpp_clock_gated[dpp_inst]) { + /* + * Do not update the DPPCLK DTO if the clock is stopped. + */ + return; + } + if (dccg->ref_dppclk && req_dppclk) { int ref_dppclk = dccg->ref_dppclk; int modulo, phase; @@ -2125,19 +2158,20 @@ static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, } static void dccg35_dpp_root_clock_control_cb( - struct dccg *dccg, - unsigned int dpp_inst, - bool power_on) + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) { + if (dccg->dpp_clock_gated[dpp_inst] == power_on) + return; /* power_on set indicates we need to ungate * Currently called from optimize_bandwidth and prepare_bandwidth calls * Since clock source is not passed restore to refclock on ungate * Redundant as gating when enabled is acheived through update_dpp_dto */ - if (power_on) - dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); - else - dccg35_disable_dpp_clk_new(dccg, dpp_inst); + dccg35_set_dppclk_rcg(dccg, dpp_inst, !power_on); + + dccg->dpp_clock_gated[dpp_inst] = !power_on; } static void dccg35_enable_symclk32_se_cb( @@ -2321,7 +2355,7 @@ static const struct dccg_funcs dccg35_funcs_new = { .update_dpp_dto = dccg35_update_dpp_dto_cb, .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, - .dccg_init = dccg35_init, + .dccg_init = dccg35_init_cb, .set_dpstreamclk = dccg35_set_dpstreamclk_cb, .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, .enable_symclk32_se = dccg35_enable_symclk32_se_cb, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 14f935961672..c31e4f26a305 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -12,6 +12,8 @@ #define MAX_PIPES 6 +#define GPINT_RETRY_NUM 20 + static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3}; static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5}; @@ -222,6 +224,7 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, uint32_t *residency, const bool is_start, enum pr_residency_mode mode) { uint16_t param = (uint16_t)(panel_inst << 8); + uint32_t i = 0; switch (mode) { case PR_RESIDENCY_MODE_PHY: @@ -249,10 +252,17 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, if (is_start) param |= REPLAY_RESIDENCY_ENABLE; - // Send gpint command and wait for ack - if (!dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param, - residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - *residency = 0; + for (i = 0; i < GPINT_RETRY_NUM; i++) { + // Send gpint command and wait for ack + if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param, + residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + return; + + udelay(100); + } + + // it means gpint retry many times + *residency = 0; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 710a25dcfef0..b0d9aed0f265 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -777,6 +777,14 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 1c773bbb9992..eeb96c455658 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -5,7 +5,6 @@ #ifndef __DML_TOP_TYPES_H__ #define __DML_TOP_TYPES_H__ -#include "dml_top_types.h" #include "dml_top_display_cfg_types.h" #include "dml_top_soc_parameter_types.h" #include "dml_top_policy_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 805fd783131f..3ea54fd52e46 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -2085,7 +2085,11 @@ static void CalculateDCCConfiguration( unsigned int full_swath_bytes_vert_wc_l; unsigned int full_swath_bytes_vert_wc_c; - yuv420 = dml_is_420(SourcePixelFormat); + if (dml_is_420(SourcePixelFormat)) + yuv420 = 1; + else + yuv420 = 0; + horz_div_l = 1; horz_div_c = 1; vert_div_l = 1; @@ -2553,8 +2557,11 @@ static void calculate_mcache_setting( l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: - l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; - if (l->is_dual_plane) { + if (*p->num_mcaches_l) { + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + } + + if (l->is_dual_plane && *p->num_mcaches_c) { l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { @@ -2683,9 +2690,9 @@ static double dml_get_return_bandwidth_available( double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; - double derate_sdp_factor = 1; - double derate_fabric_factor = 1; - double derate_dram_factor = 1; + double derate_sdp_factor; + double derate_fabric_factor; + double derate_dram_factor; double derate_sdp_bandwidth; double derate_fabric_bandwidth; @@ -5056,6 +5063,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->trip_to_mem = 0.0; *p->Tvm_trips = 0.0; *p->Tr0_trips = 0.0; + s->Tvm_no_trip_oto = 0.0; + s->Tr0_no_trip_oto = 0.0; s->Tvm_trips_rounded = 0.0; s->Tr0_trips_rounded = 0.0; s->max_Tsw = 0.0; @@ -5293,31 +5302,38 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_oto = math_max3( - *p->Tvm_trips, + s->Tvm_no_trip_oto = math_max2( *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, s->LineTime / 4.0); + s->Tvm_oto = math_max2( + *p->Tvm_trips, + s->Tvm_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { + s->Tvm_no_trip_oto = s->Tvm_trips_rounded; s->Tvm_oto = s->Tvm_trips_rounded; } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { - s->Tr0_oto = math_max3( - *p->Tr0_trips, + s->Tr0_no_trip_oto = math_max2( (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); + s->Tr0_oto = math_max2( + *p->Tr0_trips, + s->Tr0_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif - } else - s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 4.0; + } else { + s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0; + s->Tr0_oto = s->Tr0_no_trip_oto; + } s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; @@ -5595,6 +5611,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0; + // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { *p->dst_y_prefetch = s->dst_y_prefetch_oto; @@ -5603,25 +5622,28 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0; + s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif - } else { *p->dst_y_prefetch = s->dst_y_prefetch_equ; s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank; + s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } - // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) - s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */ + s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); @@ -5741,13 +5763,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (vm_bytes == 0) { prefetch_vm_bw = 0; - } else if (*p->dst_y_per_vm_vblank > 0) { + } else if (s->dst_y_per_vm_no_trip_vblank > 0) { #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif - prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -5759,8 +5781,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { prefetch_row_bw = 0; - } else if (*p->dst_y_per_row_vblank > 0) { - prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + } else if (s->dst_y_per_row_no_trip_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); @@ -7194,7 +7216,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -10739,7 +10761,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory, !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 13961c2eb634..cbdfbd5a0bde 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -1187,11 +1187,15 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double prefetch_bw_oto; double Tvm_oto; double Tr0_oto; + double Tvm_no_trip_oto; + double Tr0_no_trip_oto; double Tvm_oto_lines; double Tr0_oto_lines; double dst_y_prefetch_oto; double TimeForFetchingVM; double TimeForFetchingRowInVBlank; + double dst_y_per_vm_no_trip_vblank; + double dst_y_per_row_no_trip_vblank; double LinesToRequestPrefetchPixelData; unsigned int HostVMDynamicLevelsTrips; double trip_to_mem; @@ -1199,6 +1203,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double Tr0_trips_rounded; double max_Tsw; double Lsw_oto; + double Lsw_equ; double Tpre_rounded; double prefetch_bw_equ; double Tvm_equ; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 8e68a8094658..a31db5742675 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -497,7 +497,6 @@ bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in in_out->cfg_support_info->plane_support_info[i].dpps_used)) { result = false; } else { - free_pipes -= planes_on_stream; break; } } else { @@ -666,7 +665,7 @@ bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_su struct dml2_pmo_instance *pmo = in_out->instance; unsigned int stream_index; bool success = false; - bool reached_end = true; + bool reached_end; memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 30767f330fd4..d63558ee3135 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -334,7 +334,6 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o in_out->cfg_support_info->plane_support_info[i].dpps_used)) { result = false; } else { - free_pipes -= planes_on_stream; break; } } else { @@ -672,8 +671,6 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) /* populate list */ expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4); break; - default: - break; } } @@ -944,7 +941,8 @@ static void build_synchronized_timing_groups( for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0) { + sizeof(struct dml2_timing_cfg)) == 0 && + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index dc8af4dd0410..d0e026d981b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -219,7 +219,6 @@ bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; lowest_state = 0; - cur_state = 0; while (highest_state > lowest_state) { cur_state = (highest_state + lowest_state) / 2; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index 3ba184be25d3..140ec01545db 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,7 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; - unsigned int dp2_mst_stream_count; + int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS]; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 7e39873832bf..bde4250853b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,8 +733,7 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe, - unsigned int dp2_mst_stream_count) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) { unsigned int output_bpc; @@ -747,8 +746,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) - out->OutputEncoder[location] = dml_dp2p0; + if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: out->OutputEncoder[location] = dml_edp; @@ -1199,36 +1198,6 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, } } -static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) -{ - int i, j; - unsigned int dp2_mst_stream_count = 0; - - for (i = 0; i < context->stream_count; i++) { - struct dc_stream_state *stream = context->streams[i]; - - if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) - continue; - - for (j = 0; j < MAX_PIPES; j++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - - if (!pipe_ctx || !pipe_ctx->stream) - continue; - - if (stream != pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { - dp2_mst_stream_count++; - break; - } - } - } - - return dp2_mst_stream_count; -} - static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1269,6 +1238,30 @@ static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cf } } } + +static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context) +{ + int i; + struct pipe_ctx *current_pipe_context; + + /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */ + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1; + } + + /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */ + for (i = 0; i < MAX_PIPES; i++) { + current_pipe_context = &context->res_ctx.pipe_ctx[i]; + if (current_pipe_context->stream && + current_pipe_context->stream_res.hpo_dp_stream_enc && + current_pipe_context->link_res.hpo_dp_link_enc && + dc_is_dp_signal(current_pipe_context->stream->signal)) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] = + current_pipe_context->link_res.hpo_dp_link_enc->inst; + } + } +} + void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { int i = 0, j = 0, k = 0; @@ -1291,8 +1284,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; - dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); + dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context); for (i = 0; i < context->stream_count; i++) { current_pipe_context = NULL; @@ -1313,7 +1306,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1378,7 +1371,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index 55659b22d87f..d764773938f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 9e8ff3a9718e..9a33158b63bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -161,14 +161,6 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_m /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); - /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal) && - (pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id)); - } - return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); @@ -181,7 +173,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) return true; } return false; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 505929800426..01f98139292e 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,7 +280,8 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode) + bool chroma_coef_mode, + bool force_coeffs_update) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -343,7 +344,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if (filter_updated) { + if ((filter_updated) || (force_coeffs_update)) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -656,274 +657,252 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, RECOUT_HEIGHT, recout->height); } /** - * dpp401_dscl_program_easf - Program EASF + * dpp401_dscl_program_easf_v - Program EASF_V * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info * - * This is the primary function to program EASF + * This is the primary function to program vertical EASF registers * */ -static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data) +static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); /* DSCL_EASF_V_MODE */ - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); - REG_UPDATE(DSCL_EASF_V_MODE, + REG_SET_3(DSCL_EASF_V_MODE, 0, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, + + if (!scl_data->dscl_prog_data.easf_v_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_V_BF_CNTL */ + REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + /* DSCL_EASF_V_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + /* DSCL_EASF_V_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf_h - Program EASF_H + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program horizontal EASF registers + * + */ +static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); /* DSCL_EASF_H_MODE */ - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); - REG_UPDATE(DSCL_EASF_H_MODE, + REG_SET_3(DSCL_EASF_H_MODE, 0, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, + + if (!scl_data->dscl_prog_data.easf_h_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_H_BF_CNTL */ + REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + /* DSCL_EASF_H_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + /* DSCL_EASF_H_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf - Program EASF + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program EASF + * + */ +static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_SC_MODE */ + REG_SET_2(DSCL_SC_MODE, 0, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, + SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_UPDATE(DSCL_SC_MATRIX_C0C1, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); - REG_UPDATE(DSCL_SC_MATRIX_C0C1, + REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, + REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); + dpp401_dscl_program_easf_v(dpp_base, scl_data); + dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -958,10 +937,11 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); + /* LUT data write is auto-indexed. Write index once */ + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -976,107 +956,76 @@ static void dpp401_dscl_set_isharp_filter( * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data) + const struct scaler_data *scl_data, + bool program_isharp_1dlut, + bool *bs_coeffs_updated) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + *bs_coeffs_updated = false; PERF_TRACE(); - /* ISHARP_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en); - /* ISHARP_NOISEDET_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); - /* ISHARP_NOISEDET_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + /* ISHARP_MODE */ + REG_SET_6(ISHARP_MODE, 0, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); + + /* Skip remaining register programming if ISHARP is disabled */ + if (!scl_data->dscl_prog_data.isharp_en) { + PERF_TRACE(); + return; + } + + /* ISHARP_NOISEDET_THRESHOLD */ + REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - /* ISHARP_NOISEDET_PWL_START_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); - /* ISHARP_NOISEDET_PWL_END_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); - /* ISHARP_NOISEDET_PWL_SLOPE */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + + /* ISHARP_NOISE_GAIN_PWL */ + REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - /* ISHARP_LBA_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); + /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, + REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, + REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, + REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, + REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, + REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, + REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); - /* ISHARP_FMT_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); - /* ISHARP_FMT_NORM */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ - dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - /* ISHARP_NLDELTA_SCLIP_EN_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); - /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); - /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); - /* ISHARP_NLDELTA_SCLIP_EN_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); - /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); - /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + if (!program_isharp_1dlut) + dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); + + /* ISHARP_NLDELTA_SOFT_CLIP */ + REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ @@ -1086,12 +1035,14 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); + *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); + *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1122,12 +1073,29 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; + bool program_isharp_1dlut = false; + bool bs_coeffs_updated = false; + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; PERF_TRACE(); + /* If only sharpness has changed, then only update 1dlut, then return */ + if (scl_data->dscl_prog_data.isharp_en && + (dpp->scl_data.dscl_prog_data.sharpness_level + != scl_data->dscl_prog_data.sharpness_level)) { + /* ISHARP_DELTA_LUT */ + dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); + dpp->scl_data.dscl_prog_data.sharpness_level = scl_data->dscl_prog_data.sharpness_level; + dpp->scl_data.dscl_prog_data.isharp_delta = scl_data->dscl_prog_data.isharp_delta; + + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) + return; + program_isharp_1dlut = true; + } + dpp->scl_data = *scl_data; if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) { @@ -1181,7 +1149,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data); + dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); return; } @@ -1208,12 +1176,18 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); + /* ISharp configuration + * - B&S coeffs are written to same coeff RAM as WB scaler coeffs + * - coeff RAM toggle is in EASF programming + * - if we are only programming B&S coeffs, then need to reprogram + * WB scaler coeffs and toggle coeff RAM together + */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); + + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); - /* isharp configuration */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index fbbb20b9dbee..a4c6decee0f8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -147,37 +147,6 @@ void dcn35_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); } - if (!dc->debug.disable_clock_gate) { - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); - - /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); - - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4, - DPIASYMCLK0_GATE_DISABLE, 0, - DPIASYMCLK1_GATE_DISABLE, 0, - DPIASYMCLK2_GATE_DISABLE, 0, - DPIASYMCLK3_GATE_DISABLE, 0); - - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF); - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, - DTBCLK_P0_GATE_DISABLE, 0, - DTBCLK_P1_GATE_DISABLE, 0, - DTBCLK_P2_GATE_DISABLE, 0, - DTBCLK_P3_GATE_DISABLE, 0); - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, - DPSTREAMCLK0_GATE_DISABLE, 0, - DPSTREAMCLK1_GATE_DISABLE, 0, - DPSTREAMCLK2_GATE_DISABLE, 0, - DPSTREAMCLK3_GATE_DISABLE, 0); - - } - // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) res_pool->dccg->funcs->dccg_init(res_pool->dccg); @@ -305,20 +274,6 @@ void dcn35_init_hw(struct dc *dc) if (!dc->debug.disable_clock_gate) { /* enable all DCN clock gating */ - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, 0, - SYMCLKB_FE_GATE_DISABLE, 0, - SYMCLKC_FE_GATE_DISABLE, 0, - SYMCLKD_FE_GATE_DISABLE, 0, - SYMCLKE_FE_GATE_DISABLE, 0); - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, 0); - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, 0, - SYMCLKB_GATE_DISABLE, 0, - SYMCLKC_GATE_DISABLE, 0, - SYMCLKD_GATE_DISABLE, 0, - SYMCLKE_GATE_DISABLE, 0); - REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 02e63b95c36d..9d56fbdcd06a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,6 +76,9 @@ #include "dml2/dml2_wrapper.h" +#include "spl/dc_spl_scl_easf_filters.h" +#include "spl/dc_spl_isharp_filters.h" + #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -2126,6 +2129,10 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; + /* SPL */ + spl_init_easf_filter_coeffs(); + spl_init_blur_scale_coeffs(); + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index f8df85ea4d32..5edf3c6cf3e2 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o spl_custom_float.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 9eccdb38bed4..15f7eda903e6 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,9 +4,11 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" #include "dc_spl_isharp_filters.h" +#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -107,26 +109,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct fixed31_32 temp; + struct spl_fixed31_32 temp; - temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + dc_fixpt_round(temp); + rec_out.x = stream_dst->x + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; - temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + dc_fixpt_round(temp); + rec_out.y = stream_dst->y + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -144,7 +146,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - ASSERT(mpc_slice_count == 1 || + SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -157,7 +159,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - ASSERT(mpc_rec.height % 2 == 0); + SPL_ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -197,7 +199,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -340,20 +342,23 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_out->scl_data.recout = shift_rec( + spl_scratch->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_out->scl_data.recout, 0, + memset(&spl_scratch->scl_data.recout, 0, sizeof(struct spl_rect)); } + /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, + struct spl_scratch *spl_scratch, + struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -364,59 +369,75 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - swap(surf_src.height, surf_src.width); + spl_swap(surf_src.height, surf_src.width); - spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_out->scl_data.ratios.horz.value *= 2; + spl_scratch->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_out->scl_data.ratios.vert.value *= 2; + spl_scratch->scl_data.ratios.vert.value *= 2; - spl_out->scl_data.ratios.vert.value = div64_s64( - spl_out->scl_data.ratios.vert.value * in_h, out_h); - spl_out->scl_data.ratios.horz.value = div64_s64( - spl_out->scl_data.ratios.horz.value * in_w, out_w); + spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( + spl_scratch->scl_data.ratios.vert.value * in_h, out_h); + spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( + spl_scratch->scl_data.ratios.horz.value * in_w, out_w); - spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; - spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; + spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; + spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_out->scl_data.ratios.horz_c.value /= 2; - spl_out->scl_data.ratios.vert_c.value /= 2; + spl_scratch->scl_data.ratios.horz_c.value /= 2; + spl_scratch->scl_data.ratios.vert_c.value /= 2; } - spl_out->scl_data.ratios.horz = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz, 19); - spl_out->scl_data.ratios.vert = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert, 19); - spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz_c, 19); - spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert_c, 19); + spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz, 19); + spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert, 19); + spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz_c, 19); + spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert_c, 19); + + /* + * Coefficient table and some registers are different based on ratio + * that is output/input. Currently we calculate input/output + * Store 1/ratio in recip_ratio for those lookups + */ + spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz); + spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert); + spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz_c); + spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert_c); } + /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { - spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, - spl_out->scl_data.recout.height)); - spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, - spl_out->scl_data.recout.height)); + spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, + spl_scratch->scl_data.recout.height)); + spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, + spl_scratch->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } } + static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -440,6 +461,7 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } + /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -449,13 +471,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct fixed31_32 ratio, - struct fixed31_32 init_adj, - struct fixed31_32 *init, + struct spl_fixed31_32 ratio, + struct spl_fixed31_32 init_adj, + struct spl_fixed31_32 *init, int *vp_offset, int *vp_size) { - struct fixed31_32 temp; + struct spl_fixed31_32 temp; int int_part; /* @@ -468,33 +490,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = dc_fixpt_floor(temp); + temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = spl_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = dc_fixpt_add(*init, init_adj); - *init = dc_fixpt_truncate(*init, 19); + *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = spl_fixpt_add(*init, init_adj); + *init = spl_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = dc_fixpt_floor(*init); + int_part = spl_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = dc_fixpt_add_int(*init, int_part); + *init = spl_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = dc_fixpt_floor(temp); + temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = spl_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -509,15 +531,24 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && - (format <= SPL_PIXEL_FORMAT_VIDEO_END)) + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) + return true; + + return false; +} + +static bool spl_is_rgb8(enum spl_pixel_format format) +{ + if (format == SPL_PIXEL_FORMAT_ARGB8888) return true; return false; } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -528,11 +559,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct fixed31_32 init_adj_h = dc_fixpt_zero; - struct fixed31_32 init_adj_v = dc_fixpt_zero; + struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; + struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -555,8 +586,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ &flip_horz_scan_dir); if (orthogonal_rotation) { - swap(src.width, src.height); - swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -568,17 +599,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 2); - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_zero; + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_zero; break; } } @@ -586,59 +617,60 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width, - spl_out->scl_data.taps.h_taps, - spl_out->scl_data.ratios.horz, - dc_fixpt_zero, - &spl_out->scl_data.inits.h, - &spl_out->scl_data.viewport.x, - &spl_out->scl_data.viewport.width); + spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.ratios.horz, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.h, + &spl_scratch->scl_data.viewport.x, + &spl_scratch->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width / vpc_div, - spl_out->scl_data.taps.h_taps_c, - spl_out->scl_data.ratios.horz_c, + spl_scratch->scl_data.taps.h_taps_c, + spl_scratch->scl_data.ratios.horz_c, init_adj_h, - &spl_out->scl_data.inits.h_c, - &spl_out->scl_data.viewport_c.x, - &spl_out->scl_data.viewport_c.width); + &spl_scratch->scl_data.inits.h_c, + &spl_scratch->scl_data.viewport_c.x, + &spl_scratch->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height, - spl_out->scl_data.taps.v_taps, - spl_out->scl_data.ratios.vert, - dc_fixpt_zero, - &spl_out->scl_data.inits.v, - &spl_out->scl_data.viewport.y, - &spl_out->scl_data.viewport.height); + spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.ratios.vert, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.v, + &spl_scratch->scl_data.viewport.y, + &spl_scratch->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height / vpc_div, - spl_out->scl_data.taps.v_taps_c, - spl_out->scl_data.ratios.vert_c, + spl_scratch->scl_data.taps.v_taps_c, + spl_scratch->scl_data.ratios.vert_c, init_adj_v, - &spl_out->scl_data.inits.v_c, - &spl_out->scl_data.viewport_c.y, - &spl_out->scl_data.viewport_c.height); + &spl_scratch->scl_data.inits.v_c, + &spl_scratch->scl_data.viewport_c.y, + &spl_scratch->scl_data.viewport_c.height); if (orthogonal_rotation) { - swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } - spl_out->scl_data.viewport.x += src.x; - spl_out->scl_data.viewport.y += src.y; - ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_out->scl_data.viewport_c.x += src.x / vpc_div; - spl_out->scl_data.viewport_c.y += src.y / vpc_div; + spl_scratch->scl_data.viewport.x += src.x; + spl_scratch->scl_data.viewport.y += src.y; + SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; + spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; } + static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -647,7 +679,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -665,6 +697,7 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } + static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -673,6 +706,7 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } + static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -681,17 +715,21 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } + static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, + bool enable_isharp, bool enable_easf) { - const long long one = dc_fixpt_one.value; + const long long one = spl_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; + /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale) + && !spl_in->basic_out.always_scale + && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -700,69 +738,196 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; + + /* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV + * downscale without EASF + */ + if ((!enable_isharp) && (!enable_easf)) { + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) + return SCL_MODE_SCALING_420_CHROMA_BYPASS; + } return SCL_MODE_SCALING_420_YCBCR_ENABLE; } + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) +{ + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_BYPASS)) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; +} + +/* Enable EASF ?*/ +static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) +{ + int vratio = 0; + int hratio = 0; + bool skip_easf = false; + bool lls_enable_easf = true; + + if (spl_in->disable_easf) + skip_easf = true; + + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); + + /* + * No EASF support for downscaling > 2:1 + * EASF support for upscaling or downscaling up to 2:1 + */ + if ((vratio > 2) || (hratio > 2)) + skip_easf = true; + + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); + + if (!lls_enable_easf) + skip_easf = true; + + /* Check for linear scaling or EASF preferred */ + if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) + skip_easf = true; + + return skip_easf; +} + +static bool spl_get_isharp_en(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) +{ + bool enable_isharp = false; + int vratio = 0; + int hratio = 0; + struct spl_taps taps = spl_scratch->scl_data.taps; + + /* Return if adaptive sharpness is disabled */ + if (spl_in->adaptive_sharpness.enable == false) + return enable_isharp; + + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); + + /* No iSHARP support for downscaling */ + if (vratio > 1 || hratio > 1) + return enable_isharp; + + // Scaling is up to 1:1 (no scaling) or upscaling + + /* + * Apply sharpness to all RGB surfaces and to + * NV12/P010 surfaces + */ + + /* + * Apply sharpness if supports horizontal taps 4,6 AND + * vertical taps 3, 4, 6 + */ + if ((taps.h_taps == 4 || taps.h_taps == 6) && + (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) + enable_isharp = true; + + return enable_isharp; +} + /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, - const struct spl_taps *in_taps) + int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, + const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, + bool *enable_isharp) { int num_part_y, num_part_c; int max_taps_y, max_taps_c; int min_taps_y, min_taps_c; enum lb_memory_config lb_config; + bool skip_easf = false; - if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && + if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && - spl_out->scl_data.viewport.width > max_downscale_src_width) + spl_scratch->scl_data.viewport.width > max_downscale_src_width) return false; + + /* Check if we are using EASF or not */ + skip_easf = enable_easf(spl_in, spl_scratch); + /* * Set default taps if none are provided * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); - else - spl_out->scl_data.taps.h_taps = 4; - } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); - else - spl_out->scl_data.taps.v_taps = 4; - } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); - else - spl_out->scl_data.taps.v_taps_c = 4; - } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + if (skip_easf) { + if (in_taps->h_taps == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) + spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz), 8); + else + spl_scratch->scl_data.taps.h_taps = 4; + } else + spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) + spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert, 2)), 8); + else + spl_scratch->scl_data.taps.v_taps = 4; + } else + spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) + spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert_c, 2)), 8); + else + spl_scratch->scl_data.taps.v_taps_c = 4; + } else + spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) + spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz_c), 8); + else + spl_scratch->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else - spl_out->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; - else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; + } else { + if (spl_is_yuv420(spl_in->basic_in.format)) { + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; + } else { /* RGB */ + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 6; + spl_scratch->scl_data.taps.v_taps_c = 6; + } + } /*Ensure we can support the requested number of vtaps*/ - min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); + min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -771,16 +936,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -789,48 +954,108 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_out->scl_data.taps.v_taps > max_taps_y) - spl_out->scl_data.taps.v_taps = max_taps_y; - - if (spl_out->scl_data.taps.v_taps_c > max_taps_c) - spl_out->scl_data.taps.v_taps_c = max_taps_c; - if (spl_in->prefer_easf) { - // EASF can be enabled only for taps 3,4,6 - // If optimal no of taps is 5, then set it to 4 - // If optimal no of taps is 7 or 8, then set it to 6 - if (spl_out->scl_data.taps.v_taps == 5) - spl_out->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) - spl_out->scl_data.taps.v_taps = 6; - - if (spl_out->scl_data.taps.v_taps_c == 5) - spl_out->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) - spl_out->scl_data.taps.v_taps_c = 6; - - if (spl_out->scl_data.taps.h_taps == 5) - spl_out->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) - spl_out->scl_data.taps.h_taps = 6; - - if (spl_out->scl_data.taps.h_taps_c == 5) - spl_out->scl_data.taps.h_taps_c = 4; - if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) - spl_out->scl_data.taps.h_taps_c = 6; + if (spl_scratch->scl_data.taps.v_taps > max_taps_y) + spl_scratch->scl_data.taps.v_taps = max_taps_y; + if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) + spl_scratch->scl_data.taps.v_taps_c = max_taps_c; + + if (!skip_easf) { + /* + * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 + * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV + * + * If LB does not support 3, 4, or 6 taps, then disable EASF_V + * and only enable EASF_H. So for RGB, support 6x2, 4x2 + * and for NL YUV420, support 6x2 for Y and 4x2 for UV + * + * All other cases, have to disable EASF_V and EASF_H + * + * If optimal no of taps is 5, then set it to 4 + * If optimal no of taps is 7 or 8, then fine since max tap is 6 + * + */ + if (spl_scratch->scl_data.taps.v_taps == 5) + spl_scratch->scl_data.taps.v_taps = 4; + + if (spl_scratch->scl_data.taps.v_taps_c == 5) + spl_scratch->scl_data.taps.v_taps_c = 4; + + if (spl_scratch->scl_data.taps.h_taps == 5) + spl_scratch->scl_data.taps.h_taps = 4; + + if (spl_scratch->scl_data.taps.h_taps_c == 5) + spl_scratch->scl_data.taps.h_taps_c = 4; + + if (spl_is_yuv420(spl_in->basic_in.format)) { + if ((spl_scratch->scl_data.taps.h_taps <= 4) || + (spl_scratch->scl_data.taps.h_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || + (spl_scratch->scl_data.taps.v_taps_c <= 3)) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && + (spl_scratch->scl_data.taps.v_taps_c > 1)); + } else { /* RGB */ + if (spl_scratch->scl_data.taps.h_taps <= 3) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if (spl_scratch->scl_data.taps.v_taps < 3) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); + } + } else { + *enable_easf_v = false; + *enable_easf_h = false; } // end of if prefer_easf - if (!spl_in->basic_out.always_scale) { - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) - spl_out->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) - spl_out->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + + /* Sharpener requires scaler to be enabled, including for 1:1 + * Check if ISHARP can be enabled + * If ISHARP is not enabled, for 1:1, set taps to 1 and disable + * EASF + * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if + * EASF is not enabled + */ + + *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); + if (!*enable_isharp && !spl_in->basic_out.always_scale) { + if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { + spl_scratch->scl_data.taps.h_taps = 1; + spl_scratch->scl_data.taps.v_taps = 1; + + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) + spl_scratch->scl_data.taps.h_taps_c = 1; + + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) + spl_scratch->scl_data.taps.v_taps_c = 1; + + *enable_easf_v = false; + *enable_easf_h = false; + } else { + if ((!*enable_easf_h) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) + spl_scratch->scl_data.taps.h_taps_c = 1; + + if ((!*enable_easf_v) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) + spl_scratch->scl_data.taps.v_taps_c = 1; + } } return true; } + static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { @@ -848,38 +1073,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct fixed31_32 bot; + struct spl_fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.h) << 5; + spl_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - dc_fixpt_floor(scl_data->inits.h); + spl_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.h_c) << 5; + spl_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.h_c); + spl_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.v) << 5; + spl_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - dc_fixpt_floor(scl_data->inits.v); + spl_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.v_c) << 5; + spl_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.v_c); - - bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); - bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); + spl_fixpt_floor(scl_data->inits.v_c); + + bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -890,79 +1115,28 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } -static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - BREAK_TO_DEBUGGER(); - return NULL; - } -} -static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); -} -#ifdef CONFIG_DRM_AMD_DC_FP -static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if ((taps == 3) || (taps == 4) || (taps == 6)) - return spl_get_filter_isharp_bs_4tap_64p(); - else { - /* should never happen, bug */ - BREAK_TO_DEBUGGER(); - return NULL; - } -} -static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.h_taps); - dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.v_taps); -} -#endif + /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, + struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_out->scl_data; + const struct spl_scaler_data *data = &spl_scratch->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; + bool enable_easf = enable_easf_v || enable_easf_h; + // Set values for recout - dscl_prog_data->recout = spl_out->scl_data.recout; + dscl_prog_data->recout = spl_scratch->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, + enable_easf); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -973,103 +1147,135 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_out->scl_data.viewport; + dscl_prog_data->viewport = spl_scratch->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data); + spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } -/* Enable EASF ?*/ -static bool enable_easf(int scale_ratio, int taps, - enum linear_light_scaling lls_pref, bool prefer_easf) + +/* Calculate C0-C3 coefficients based on HDR_mult */ +static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100) { - // Is downscaling > 6:1 ? - if (scale_ratio > 6) { - // END - No EASF support for downscaling > 6:1 - return false; - } - // Is upscaling or downscaling up to 2:1? - if (scale_ratio <= 2) { - // Is linear scaling or EASF preferred? - if (lls_pref == LLS_PREF_YES || prefer_easf) { - // LB support taps 3, 4, 6 - if (taps == 3 || taps == 4 || taps == 6) { - // END - EASF supported - return true; - } - } - } - // END - EASF not supported - return false; + struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult; + struct spl_fixed31_32 c0_calc, c1_calc, c2_calc; + struct spl_custom_float_format fmt; + + SPL_ASSERT(hdr_multx100); + hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL); + c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL); + c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL); + c2_mult = spl_fixpt_from_fraction(722LL, 10000LL); + + c0_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c0_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + c1_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c1_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + c2_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c2_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + + fmt.exponenta_bits = 5; + fmt.mantissa_bits = 10; + fmt.sign = true; + + // fp1.5.10, C0 coefficient (LN_rec709: HDR_MULT * 0.212600 * 2^14/125) + spl_convert_to_custom_float_format(c0_calc, &fmt, &dscl_prog_data->easf_matrix_c0); + // fp1.5.10, C1 coefficient (LN_rec709: HDR_MULT * 0.715200 * 2^14/125) + spl_convert_to_custom_float_format(c1_calc, &fmt, &dscl_prog_data->easf_matrix_c1); + // fp1.5.10, C2 coefficient (LN_rec709: HDR_MULT * 0.072200 * 2^14/125) + spl_convert_to_custom_float_format(c2_calc, &fmt, &dscl_prog_data->easf_matrix_c2); + dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + /* Set EASF data */ -static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, - bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format) +static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, + bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format, enum system_setup setup, + uint32_t hdr_multx100) { - if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - else - dscl_prog_data->easf_matrix_mode = 0; - + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 1; + dscl_prog_data->easf_v_sharp_factor = 0; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) + spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) + spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) + spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) + spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) + spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) + spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1090,13 +1296,41 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1115,11 +1349,11 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } } else dscl_prog_data->easf_v_en = false; @@ -1127,52 +1361,63 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 1; + dscl_prog_data->easf_h_sharp_factor = 0; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_h_bf3_mode = - 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1190,12 +1435,40 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1213,25 +1486,30 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient + if ((setup == HDR_L) && (spl_is_rgb8(format))) { + /* Calculate C0-C3 coefficients based on HDR multiplier */ + spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100); + } else { // HDR_L ( DWM ) and SDR_L + dscl_prog_data->easf_matrix_c0 = + 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) + dscl_prog_data->easf_matrix_c1 = + 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) + dscl_prog_data->easf_matrix_c2 = + 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1243,27 +1521,43 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + + if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + /* + * 2-bit, BF3 chroma mode correction calculation mode + * Needs to be disabled for YUV420 mode + * Override lookup value + */ + dscl_prog_data->easf_v_bf3_mode = 0; + dscl_prog_data->easf_h_bf3_mode = 0; + } else + dscl_prog_data->easf_matrix_mode = 0; + } + /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (dscl_prog_data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE + if (data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; + else if (data->taps.v_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; + else if (data->taps.v_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, + enum system_setup setup) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1272,10 +1566,12 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } dscl_prog_data->isharp_en = 1; // ISHARP_EN - dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (dscl_prog_data->taps.h_taps == 6) - spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE + if (data->taps.h_taps == 6) { + dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ + spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ + } else + dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1284,50 +1580,93 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ + if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - switch (adp_sharpness.sharpness) { - case SHARPNESS_LOW: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); - break; - case SHARPNESS_MID: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); - break; - case SHARPNESS_HIGH: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; - default: - BREAK_TO_DEBUGGER(); + + if (setup == SDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 450; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x18D; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -115 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else if (setup == HDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 254; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 559; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x10C; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -244 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 592; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 40; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 204; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 818; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } + + spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness); + dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); + dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; + // Program the nldelta soft clip values if (lls_pref == LLS_PREF_YES) { dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */ @@ -1346,62 +1685,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table -#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); -#endif -} -static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, - int vscale_ratio, int hscale_ratio, struct spl_taps taps, - enum spl_pixel_format format) -{ - bool enable_isharp = false; - - if (adp_sharpness.enable == false) - return enable_isharp; // Return if adaptive sharpness is disabled - // Is downscaling ? - if (vscale_ratio > 1 || hscale_ratio > 1) { - // END - No iSHARP support for downscaling - return enable_isharp; - } - // Scaling is up to 1:1 (no scaling) or upscaling - - /* Only apply sharpness to NV12 and not P010 */ - if (format != SPL_PIXEL_FORMAT_420BPP8) - return enable_isharp; - - // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 - if (taps.h_taps == 4 || taps.h_taps == 6 || - taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { - // END - iSHARP supported - enable_isharp = true; - } - return enable_isharp; -} - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if (tf_type == SPL_TF_TYPE_PREDEFINED) { - if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || - (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) - *lls_pref = LLS_PREF_NO; - else - *lls_pref = LLS_PREF_YES; - return true; - } else if (tf_type == SPL_TF_TYPE_BYPASS) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; } /* Calculate scaler parameters */ @@ -1410,67 +1694,74 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; - bool lls_enable_easf = true; int vratio = 0; int hratio = 0; - const struct spl_scaler_data *data = &spl_out->scl_data; + struct spl_scratch spl_scratch; + struct spl_fixed31_32 isharp_scale_ratio; + enum system_setup setup; + bool enable_isharp = false; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + + memset(&spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch.scl_data.h_active = spl_in->h_active; + spl_scratch.scl_data.v_active = spl_in->v_active; + // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, spl_out); + spl_calculate_recout(spl_in, &spl_scratch, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, spl_out); + spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, spl_out); + spl_calculate_viewport_size(spl_in, &spl_scratch); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality); + &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, + &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, spl_out); + spl_calculate_inits_and_viewports(spl_in, &spl_scratch); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); // Clamp - spl_clamp_viewport(&spl_out->scl_data.viewport); + spl_clamp_viewport(&spl_scratch.scl_data.viewport); if (!res) return res; - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out); + spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); - if (!lls_enable_easf || spl_in->disable_easf) { - enable_easf_v = false; - enable_easf_h = false; + if (spl_in->lls_pref == LLS_PREF_YES) { + if (spl_in->is_hdr_on) + setup = HDR_L; + else + setup = SDR_L; } else { - /* Enable EASF on vertical? */ - enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); - /* Enable EASF on horizontal? */ - enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); + if (spl_in->is_hdr_on) + setup = HDR_NL; + else + setup = SDR_NL; } + // Set EASF - spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format); + spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format, setup, spl_in->hdr_multx100); + // Set iSHARP - bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, - spl_out->scl_data.taps, spl_in->basic_in.format); + vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); + if (vratio <= hratio) + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; + else + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; + spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c new file mode 100644 index 000000000000..99238644e0a1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps) +{ + int num_entries = NUM_PHASES_COEFF * num_taps; + int i; + + for (i = 0; i < num_entries; i++) + *(s1_12_filter + i) = *(s1_10_filter + i) * 4; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h new file mode 100644 index 000000000000..20439cdbdb10 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_FILTERS_H__ +#define __DC_SPL_FILTERS_H__ + +#include "dc_spl_types.h" + +#define NUM_PHASES_COEFF 33 + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps); + +#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index a5d9a6223d06..33712f50d303 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,6 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" +#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -15,7 +17,7 @@ // C_start = 40.000000 // C_end = 64.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_0[32] = { +static const uint32_t filter_isharp_1D_lut_0[ISHARP_LUT_TABLE_SIZE] = { 0x02010000, 0x0A070503, 0x1614100D, @@ -61,7 +63,7 @@ static const uint32_t filter_isharp_1D_lut_0[32] = { // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_0p5x[32] = { +static const uint32_t filter_isharp_1D_lut_0p5x[ISHARP_LUT_TABLE_SIZE] = { 0x00000000, 0x02020101, 0x06050403, @@ -106,7 +108,7 @@ static const uint32_t filter_isharp_1D_lut_0p5x[32] = { // C_start = 96.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_1p0x[32] = { +static const uint32_t filter_isharp_1D_lut_1p0x[ISHARP_LUT_TABLE_SIZE] = { 0x01000000, 0x05040302, 0x0B0A0806, @@ -151,7 +153,7 @@ static const uint32_t filter_isharp_1D_lut_1p0x[32] = { // C_start = 96.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_1p5x[32] = { +static const uint32_t filter_isharp_1D_lut_1p5x[ISHARP_LUT_TABLE_SIZE] = { 0x01010000, 0x07050402, 0x110F0C0A, @@ -196,7 +198,7 @@ static const uint32_t filter_isharp_1D_lut_1p5x[32] = { // C_start = 40.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_2p0x[32] = { +static const uint32_t filter_isharp_1D_lut_2p0x[ISHARP_LUT_TABLE_SIZE] = { 0x02010000, 0x0A070503, 0x1614100D, @@ -230,6 +232,53 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 3.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 40.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_3p0x[ISHARP_LUT_TABLE_SIZE] = { +0x03010000, +0x0F0B0805, +0x211E1813, +0x2B292624, +0x3533302E, +0x3E3C3A37, +0x46444240, +0x4D4B4A48, +0x5352504F, +0x59575655, +0x5D5C5B5A, +0x61605F5E, +0x64646362, +0x66666565, +0x68686767, +0x68686868, +0x68686868, +0x67676868, +0x65656666, +0x62636464, +0x5E5F6061, +0x5A5B5C5D, +0x55565759, +0x4F505253, +0x484A4B4D, +0x40424446, +0x373A3C3E, +0x2E303335, +0x2426292B, +0x191B1E21, +0x0D101316, +0x0003060A, +}; + +//======================================== // Wide scaler coefficients //======================================================== // <using> gen_scaler_coeffs.m @@ -284,7 +333,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // <CoefType> Blur & Scale LPF // <CoefQuant> S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -319,6 +368,138 @@ static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; +//======================================================== +// <using> gen_BlurScale_coeffs.m +// <date> 25-Apr-2022 +// <num_taps> 4 +// <num_phases> 64 +// <CoefType> Blur & Scale LPF +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_4tap_64p[132] = { +0x00E5, 0x0237, 0x00E4, 0x0000, +0x00DE, 0x0237, 0x00EB, 0x0000, +0x00D7, 0x0236, 0x00F2, 0x0001, +0x00D0, 0x0235, 0x00FA, 0x0001, +0x00C9, 0x0234, 0x0101, 0x0002, +0x00C2, 0x0233, 0x0108, 0x0003, +0x00BB, 0x0232, 0x0110, 0x0003, +0x00B5, 0x0230, 0x0117, 0x0004, +0x00AE, 0x022E, 0x011F, 0x0005, +0x00A8, 0x022C, 0x0126, 0x0006, +0x00A2, 0x022A, 0x012D, 0x0007, +0x009C, 0x0228, 0x0134, 0x0008, +0x0096, 0x0225, 0x013C, 0x0009, +0x0090, 0x0222, 0x0143, 0x000B, +0x008A, 0x021F, 0x014B, 0x000C, +0x0085, 0x021C, 0x0151, 0x000E, +0x007F, 0x0218, 0x015A, 0x000F, +0x007A, 0x0215, 0x0160, 0x0011, +0x0074, 0x0211, 0x0168, 0x0013, +0x006F, 0x020D, 0x016F, 0x0015, +0x006A, 0x0209, 0x0176, 0x0017, +0x0065, 0x0204, 0x017E, 0x0019, +0x0060, 0x0200, 0x0185, 0x001B, +0x005C, 0x01FB, 0x018C, 0x001D, +0x0057, 0x01F6, 0x0193, 0x0020, +0x0053, 0x01F1, 0x019A, 0x0022, +0x004E, 0x01EC, 0x01A1, 0x0025, +0x004A, 0x01E6, 0x01A8, 0x0028, +0x0046, 0x01E1, 0x01AF, 0x002A, +0x0042, 0x01DB, 0x01B6, 0x002D, +0x003F, 0x01D5, 0x01BB, 0x0031, +0x003B, 0x01CF, 0x01C2, 0x0034, +0x0037, 0x01C9, 0x01C9, 0x0037, +}; +//======================================================== +// <using> gen_BlurScale_coeffs.m +// <date> 09-Jun-2022 +// <num_taps> 3 +// <num_phases> 64 +// <CoefType> Blur & Scale LPF +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_3tap_64p[99] = { +0x0200, 0x0200, 0x0000, +0x01F6, 0x0206, 0x0004, +0x01EC, 0x020B, 0x0009, +0x01E2, 0x0211, 0x000D, +0x01D8, 0x0216, 0x0012, +0x01CE, 0x021C, 0x0016, +0x01C4, 0x0221, 0x001B, +0x01BA, 0x0226, 0x0020, +0x01B0, 0x022A, 0x0026, +0x01A6, 0x022F, 0x002B, +0x019C, 0x0233, 0x0031, +0x0192, 0x0238, 0x0036, +0x0188, 0x023C, 0x003C, +0x017E, 0x0240, 0x0042, +0x0174, 0x0244, 0x0048, +0x016A, 0x0248, 0x004E, +0x0161, 0x024A, 0x0055, +0x0157, 0x024E, 0x005B, +0x014D, 0x0251, 0x0062, +0x0144, 0x0253, 0x0069, +0x013A, 0x0256, 0x0070, +0x0131, 0x0258, 0x0077, +0x0127, 0x025B, 0x007E, +0x011E, 0x025C, 0x0086, +0x0115, 0x025E, 0x008D, +0x010B, 0x0260, 0x0095, +0x0102, 0x0262, 0x009C, +0x00F9, 0x0263, 0x00A4, +0x00F0, 0x0264, 0x00AC, +0x00E7, 0x0265, 0x00B4, +0x00DF, 0x0264, 0x00BD, +0x00D6, 0x0265, 0x00C5, +0x00CD, 0x0266, 0x00CD, +}; + +/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ +static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; +static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; +static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; + +/* Pre-generated 1DLUT for given setup and sharpness level */ +struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] = { + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -339,11 +520,165 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) +{ + return filter_isharp_1D_lut_3p0x; +} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) { - return filter_isharp_bs_4tap_64p; + return filter_isharp_bs_4tap_in_6_64p_s1_12; } +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +{ + return filter_isharp_bs_4tap_64p_s1_12; +} +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) +{ + return filter_isharp_bs_3tap_64p_s1_12; +} + +static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup, + struct spl_sharpness_range sharpness_range) +{ + unsigned int sharpness_level = 0; + + int min_sharpness, max_sharpness, mid_sharpness; + + switch (setup) { + + case HDR_L: + min_sharpness = sharpness_range.hdr_rgb_min; + max_sharpness = sharpness_range.hdr_rgb_max; + mid_sharpness = sharpness_range.hdr_rgb_mid; + break; + case HDR_NL: + /* currently no use case, use Non-linear SDR values for now */ + case SDR_NL: + min_sharpness = sharpness_range.sdr_yuv_min; + max_sharpness = sharpness_range.sdr_yuv_max; + mid_sharpness = sharpness_range.sdr_yuv_mid; + break; + case SDR_L: + default: + min_sharpness = sharpness_range.sdr_rgb_min; + max_sharpness = sharpness_range.sdr_rgb_max; + mid_sharpness = sharpness_range.sdr_rgb_mid; + break; + } + + int lower_half_step_size = (mid_sharpness - min_sharpness) / 5; + int upper_half_step_size = (max_sharpness - mid_sharpness) / 5; + + // lower half linear approximation + if (discrete_sharpness_level < 5) + sharpness_level = min_sharpness + (lower_half_step_size * discrete_sharpness_level); + // upper half linear approximation + else + sharpness_level = mid_sharpness + (upper_half_step_size * (discrete_sharpness_level - 5)); + + return sharpness_level; +} + +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, + struct adaptive_sharpness sharpness) +{ + uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; + struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level; + int j; + int size_1dlut; + int sharp_calc_int; + uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE]; + + /* Custom sharpnessX1000 value */ + unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level, + setup, sharpness.sharpness_range); + sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000); + + /* + * Check if pregen 1dlut table is already precalculated + * If numer/denom is different, then recalculate + */ + if ((filter_isharp_1D_lut_pregen[setup].sharpness_numer == sharpnessX1000) && + (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000)) + return; + + + /* + * Calculate LUT_128_gained with this equation: + * + * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) + * where LUT_128[i] is contents of 3p0x isharp 1dlut + * where sharpLevel is desired sharpness level + * where iGain is base sharpness level 3.0 + * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level + */ + byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; + byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; + size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); + memset(byte_ptr_1dlut_dst, 0, size_1dlut); + for (j = 0; j < size_1dlut; j++) { + sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); + sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); + sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); + sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); + sharp_calc_int = spl_fixpt_floor(sharp_calc); + /* Clamp it at 0x7F so it doesn't wrap */ + if (sharp_calc_int > 127) + sharp_calc_int = 127; + *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; + + byte_ptr_1dlut_src++; + byte_ptr_1dlut_dst++; + } + + /* Update 1dlut table and sharpness level */ + memcpy((void *)filter_isharp_1D_lut_pregen[setup].value, (void *)filter_pregen_store, size_1dlut); + filter_isharp_1D_lut_pregen[setup].sharpness_numer = sharpnessX1000; + filter_isharp_1D_lut_pregen[setup].sharpness_denom = 1000; +} + +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup) +{ + return filter_isharp_1D_lut_pregen[setup].value; +} + +void spl_init_blur_scale_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, + filter_isharp_bs_3tap_64p_s1_12, 3); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, + filter_isharp_bs_4tap_64p_s1_12, 4); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, + filter_isharp_bs_4tap_in_6_64p_s1_12, 6); +} + +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if (taps == 3) + return spl_get_filter_isharp_bs_3tap_64p(); + else if (taps == 4) + return spl_get_filter_isharp_bs_4tap_64p(); + else if (taps == 6) + return spl_get_filter_isharp_bs_4tap_in_6_64p(); + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); + + dscl_prog_data->filter_blur_scale_v = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 1aaf4c50c1bc..fe0b12571f2c 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -7,11 +7,44 @@ #include "dc_spl_types.h" +#define ISHARP_LUT_TABLE_SIZE 32 const uint32_t *spl_get_filter_isharp_1D_lut_0(void); const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); + +struct scale_ratio_to_sharpness_level_lookup { + unsigned int ratio_numer; + unsigned int ratio_denom; + unsigned int sharpness_numer; + unsigned int sharpness_denom; +}; + +struct isharp_1D_lut_pregen { + unsigned int sharpness_numer; + unsigned int sharpness_denom; + uint32_t value[ISHARP_LUT_TABLE_SIZE]; +}; + +enum system_setup { + SDR_NL = 0, + SDR_L, + HDR_NL, + HDR_L, + NUM_SHARPNESS_SETUPS +}; + +void spl_init_blur_scale_coeffs(void); +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data); + +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness); +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c new file mode 100644 index 000000000000..09bf82f7d468 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -0,0 +1,1726 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_debug.h" +#include "dc_spl_filters.h" +#include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.3_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.300000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EC, 0x020B, 0x0009, + 0x01E2, 0x0211, 0x000D, + 0x01D8, 0x0216, 0x0012, + 0x01CE, 0x021C, 0x0016, + 0x01C4, 0x0221, 0x001B, + 0x01BA, 0x0226, 0x0020, + 0x01B0, 0x022A, 0x0026, + 0x01A6, 0x022F, 0x002B, + 0x019C, 0x0233, 0x0031, + 0x0192, 0x0238, 0x0036, + 0x0188, 0x023C, 0x003C, + 0x017E, 0x0240, 0x0042, + 0x0174, 0x0244, 0x0048, + 0x016A, 0x0248, 0x004E, + 0x0161, 0x024A, 0x0055, + 0x0157, 0x024E, 0x005B, + 0x014D, 0x0251, 0x0062, + 0x0144, 0x0253, 0x0069, + 0x013A, 0x0256, 0x0070, + 0x0131, 0x0258, 0x0077, + 0x0127, 0x025B, 0x007E, + 0x011E, 0x025C, 0x0086, + 0x0115, 0x025E, 0x008D, + 0x010B, 0x0260, 0x0095, + 0x0102, 0x0262, 0x009C, + 0x00F9, 0x0263, 0x00A4, + 0x00F0, 0x0264, 0x00AC, + 0x00E7, 0x0265, 0x00B4, + 0x00DF, 0x0264, 0x00BD, + 0x00D6, 0x0265, 0x00C5, + 0x00CD, 0x0266, 0x00CD, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.4_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.400000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EB, 0x020E, 0x0007, + 0x01E1, 0x0214, 0x000B, + 0x01D7, 0x021A, 0x000F, + 0x01CD, 0x0220, 0x0013, + 0x01C2, 0x0226, 0x0018, + 0x01B8, 0x022C, 0x001C, + 0x01AE, 0x0231, 0x0021, + 0x01A3, 0x0237, 0x0026, + 0x0199, 0x023C, 0x002B, + 0x018F, 0x0240, 0x0031, + 0x0185, 0x0245, 0x0036, + 0x017A, 0x024A, 0x003C, + 0x0170, 0x024F, 0x0041, + 0x0166, 0x0253, 0x0047, + 0x015C, 0x0257, 0x004D, + 0x0152, 0x025A, 0x0054, + 0x0148, 0x025E, 0x005A, + 0x013E, 0x0261, 0x0061, + 0x0134, 0x0264, 0x0068, + 0x012B, 0x0266, 0x006F, + 0x0121, 0x0269, 0x0076, + 0x0117, 0x026C, 0x007D, + 0x010E, 0x026E, 0x0084, + 0x0104, 0x0270, 0x008C, + 0x00FB, 0x0271, 0x0094, + 0x00F2, 0x0272, 0x009C, + 0x00E9, 0x0273, 0x00A4, + 0x00E0, 0x0274, 0x00AC, + 0x00D7, 0x0275, 0x00B4, + 0x00CE, 0x0275, 0x00BD, + 0x00C5, 0x0276, 0x00C5, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.5_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.500000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F5, 0x0209, 0x0002, + 0x01EA, 0x0211, 0x0005, + 0x01DF, 0x021A, 0x0007, + 0x01D4, 0x0222, 0x000A, + 0x01C9, 0x022A, 0x000D, + 0x01BE, 0x0232, 0x0010, + 0x01B3, 0x0239, 0x0014, + 0x01A8, 0x0241, 0x0017, + 0x019D, 0x0248, 0x001B, + 0x0192, 0x024F, 0x001F, + 0x0187, 0x0255, 0x0024, + 0x017C, 0x025C, 0x0028, + 0x0171, 0x0262, 0x002D, + 0x0166, 0x0268, 0x0032, + 0x015B, 0x026E, 0x0037, + 0x0150, 0x0273, 0x003D, + 0x0146, 0x0278, 0x0042, + 0x013B, 0x027D, 0x0048, + 0x0130, 0x0282, 0x004E, + 0x0126, 0x0286, 0x0054, + 0x011B, 0x028A, 0x005B, + 0x0111, 0x028D, 0x0062, + 0x0107, 0x0290, 0x0069, + 0x00FD, 0x0293, 0x0070, + 0x00F3, 0x0296, 0x0077, + 0x00E9, 0x0298, 0x007F, + 0x00DF, 0x029A, 0x0087, + 0x00D5, 0x029C, 0x008F, + 0x00CC, 0x029D, 0x0097, + 0x00C3, 0x029E, 0x009F, + 0x00BA, 0x029E, 0x00A8, + 0x00B1, 0x029E, 0x00B1, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.6_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.600000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F4, 0x020B, 0x0001, + 0x01E8, 0x0216, 0x0002, + 0x01DC, 0x0221, 0x0003, + 0x01D0, 0x022B, 0x0005, + 0x01C4, 0x0235, 0x0007, + 0x01B8, 0x0240, 0x0008, + 0x01AC, 0x0249, 0x000B, + 0x01A0, 0x0253, 0x000D, + 0x0194, 0x025C, 0x0010, + 0x0188, 0x0265, 0x0013, + 0x017C, 0x026E, 0x0016, + 0x0170, 0x0277, 0x0019, + 0x0164, 0x027F, 0x001D, + 0x0158, 0x0287, 0x0021, + 0x014C, 0x028F, 0x0025, + 0x0140, 0x0297, 0x0029, + 0x0135, 0x029D, 0x002E, + 0x0129, 0x02A4, 0x0033, + 0x011D, 0x02AB, 0x0038, + 0x0112, 0x02B0, 0x003E, + 0x0107, 0x02B5, 0x0044, + 0x00FC, 0x02BA, 0x004A, + 0x00F1, 0x02BF, 0x0050, + 0x00E6, 0x02C3, 0x0057, + 0x00DB, 0x02C7, 0x005E, + 0x00D1, 0x02CA, 0x0065, + 0x00C7, 0x02CC, 0x006D, + 0x00BD, 0x02CE, 0x0075, + 0x00B3, 0x02D0, 0x007D, + 0x00A9, 0x02D2, 0x0085, + 0x00A0, 0x02D2, 0x008E, + 0x0097, 0x02D2, 0x0097, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.7_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.700000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F3, 0x020D, 0x0000, + 0x01E5, 0x021B, 0x0000, + 0x01D8, 0x0228, 0x0000, + 0x01CB, 0x0235, 0x0000, + 0x01BD, 0x0243, 0x0000, + 0x01B0, 0x024F, 0x0001, + 0x01A2, 0x025C, 0x0002, + 0x0195, 0x0268, 0x0003, + 0x0187, 0x0275, 0x0004, + 0x017A, 0x0280, 0x0006, + 0x016D, 0x028C, 0x0007, + 0x015F, 0x0298, 0x0009, + 0x0152, 0x02A2, 0x000C, + 0x0145, 0x02AD, 0x000E, + 0x0138, 0x02B7, 0x0011, + 0x012B, 0x02C0, 0x0015, + 0x011E, 0x02CA, 0x0018, + 0x0111, 0x02D3, 0x001C, + 0x0105, 0x02DB, 0x0020, + 0x00F8, 0x02E3, 0x0025, + 0x00EC, 0x02EA, 0x002A, + 0x00E0, 0x02F1, 0x002F, + 0x00D5, 0x02F6, 0x0035, + 0x00C9, 0x02FC, 0x003B, + 0x00BE, 0x0301, 0x0041, + 0x00B3, 0x0305, 0x0048, + 0x00A8, 0x0309, 0x004F, + 0x009E, 0x030C, 0x0056, + 0x0094, 0x030E, 0x005E, + 0x008A, 0x0310, 0x0066, + 0x0081, 0x0310, 0x006F, + 0x0077, 0x0312, 0x0077, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.8_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.800000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F1, 0x0210, 0x0FFF, + 0x01E2, 0x0220, 0x0FFE, + 0x01D2, 0x0232, 0x0FFC, + 0x01C3, 0x0241, 0x0FFC, + 0x01B4, 0x0251, 0x0FFB, + 0x01A4, 0x0262, 0x0FFA, + 0x0195, 0x0271, 0x0FFA, + 0x0186, 0x0281, 0x0FF9, + 0x0176, 0x0291, 0x0FF9, + 0x0167, 0x02A0, 0x0FF9, + 0x0158, 0x02AE, 0x0FFA, + 0x0149, 0x02BD, 0x0FFA, + 0x013A, 0x02CB, 0x0FFB, + 0x012C, 0x02D7, 0x0FFD, + 0x011D, 0x02E5, 0x0FFE, + 0x010F, 0x02F1, 0x0000, + 0x0101, 0x02FD, 0x0002, + 0x00F3, 0x0308, 0x0005, + 0x00E5, 0x0313, 0x0008, + 0x00D8, 0x031D, 0x000B, + 0x00CB, 0x0326, 0x000F, + 0x00BE, 0x032F, 0x0013, + 0x00B2, 0x0337, 0x0017, + 0x00A6, 0x033E, 0x001C, + 0x009A, 0x0345, 0x0021, + 0x008F, 0x034A, 0x0027, + 0x0084, 0x034F, 0x002D, + 0x0079, 0x0353, 0x0034, + 0x006F, 0x0356, 0x003B, + 0x0065, 0x0358, 0x0043, + 0x005C, 0x0359, 0x004B, + 0x0053, 0x035A, 0x0053, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_0.9_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 0.900000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EE, 0x0214, 0x0FFE, + 0x01DC, 0x0228, 0x0FFC, + 0x01CA, 0x023C, 0x0FFA, + 0x01B9, 0x024F, 0x0FF8, + 0x01A7, 0x0262, 0x0FF7, + 0x0195, 0x0276, 0x0FF5, + 0x0183, 0x028A, 0x0FF3, + 0x0172, 0x029C, 0x0FF2, + 0x0160, 0x02AF, 0x0FF1, + 0x014F, 0x02C2, 0x0FEF, + 0x013E, 0x02D4, 0x0FEE, + 0x012D, 0x02E5, 0x0FEE, + 0x011C, 0x02F7, 0x0FED, + 0x010C, 0x0307, 0x0FED, + 0x00FB, 0x0318, 0x0FED, + 0x00EC, 0x0327, 0x0FED, + 0x00DC, 0x0336, 0x0FEE, + 0x00CD, 0x0344, 0x0FEF, + 0x00BE, 0x0352, 0x0FF0, + 0x00B0, 0x035E, 0x0FF2, + 0x00A2, 0x036A, 0x0FF4, + 0x0095, 0x0375, 0x0FF6, + 0x0088, 0x037F, 0x0FF9, + 0x007B, 0x0388, 0x0FFD, + 0x006F, 0x0391, 0x0000, + 0x0064, 0x0397, 0x0005, + 0x0059, 0x039D, 0x000A, + 0x004E, 0x03A3, 0x000F, + 0x0045, 0x03A6, 0x0015, + 0x003B, 0x03A9, 0x001C, + 0x0033, 0x03AA, 0x0023, + 0x002A, 0x03AC, 0x002A, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 3t_64p_LanczosEd_p_1_p_10qb_ +// <num_taps> 3 +// <num_phases> 64 +// <scale_ratio> input/output = 1.000000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EB, 0x0217, 0x0FFE, + 0x01D5, 0x022F, 0x0FFC, + 0x01C0, 0x0247, 0x0FF9, + 0x01AB, 0x025E, 0x0FF7, + 0x0196, 0x0276, 0x0FF4, + 0x0181, 0x028D, 0x0FF2, + 0x016C, 0x02A5, 0x0FEF, + 0x0158, 0x02BB, 0x0FED, + 0x0144, 0x02D1, 0x0FEB, + 0x0130, 0x02E8, 0x0FE8, + 0x011C, 0x02FE, 0x0FE6, + 0x0109, 0x0313, 0x0FE4, + 0x00F6, 0x0328, 0x0FE2, + 0x00E4, 0x033C, 0x0FE0, + 0x00D2, 0x034F, 0x0FDF, + 0x00C0, 0x0363, 0x0FDD, + 0x00B0, 0x0374, 0x0FDC, + 0x009F, 0x0385, 0x0FDC, + 0x0090, 0x0395, 0x0FDB, + 0x0081, 0x03A4, 0x0FDB, + 0x0072, 0x03B3, 0x0FDB, + 0x0064, 0x03C0, 0x0FDC, + 0x0057, 0x03CC, 0x0FDD, + 0x004B, 0x03D6, 0x0FDF, + 0x003F, 0x03E0, 0x0FE1, + 0x0034, 0x03E8, 0x0FE4, + 0x002A, 0x03EF, 0x0FE7, + 0x0020, 0x03F5, 0x0FEB, + 0x0017, 0x03FA, 0x0FEF, + 0x000F, 0x03FD, 0x0FF4, + 0x0007, 0x03FF, 0x0FFA, + 0x0000, 0x0400, 0x0000, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.3_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.300000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { + 0x0104, 0x01F8, 0x0104, 0x0000, + 0x00FE, 0x01F7, 0x010A, 0x0001, + 0x00F8, 0x01F6, 0x010F, 0x0003, + 0x00F2, 0x01F5, 0x0114, 0x0005, + 0x00EB, 0x01F4, 0x011B, 0x0006, + 0x00E5, 0x01F3, 0x0120, 0x0008, + 0x00DF, 0x01F2, 0x0125, 0x000A, + 0x00DA, 0x01F0, 0x012A, 0x000C, + 0x00D4, 0x01EE, 0x0130, 0x000E, + 0x00CE, 0x01ED, 0x0135, 0x0010, + 0x00C8, 0x01EB, 0x013A, 0x0013, + 0x00C2, 0x01E9, 0x0140, 0x0015, + 0x00BD, 0x01E7, 0x0145, 0x0017, + 0x00B7, 0x01E5, 0x014A, 0x001A, + 0x00B1, 0x01E2, 0x0151, 0x001C, + 0x00AC, 0x01E0, 0x0155, 0x001F, + 0x00A7, 0x01DD, 0x015A, 0x0022, + 0x00A1, 0x01DB, 0x015F, 0x0025, + 0x009C, 0x01D8, 0x0165, 0x0027, + 0x0097, 0x01D5, 0x016A, 0x002A, + 0x0092, 0x01D2, 0x016E, 0x002E, + 0x008C, 0x01CF, 0x0174, 0x0031, + 0x0087, 0x01CC, 0x0179, 0x0034, + 0x0083, 0x01C9, 0x017D, 0x0037, + 0x007E, 0x01C5, 0x0182, 0x003B, + 0x0079, 0x01C2, 0x0187, 0x003E, + 0x0074, 0x01BE, 0x018C, 0x0042, + 0x0070, 0x01BA, 0x0190, 0x0046, + 0x006B, 0x01B7, 0x0195, 0x0049, + 0x0066, 0x01B3, 0x019A, 0x004D, + 0x0062, 0x01AF, 0x019E, 0x0051, + 0x005E, 0x01AB, 0x01A2, 0x0055, + 0x005A, 0x01A6, 0x01A6, 0x005A, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.4_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.400000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { + 0x00FB, 0x0209, 0x00FC, 0x0000, + 0x00F5, 0x0209, 0x0101, 0x0001, + 0x00EE, 0x0208, 0x0108, 0x0002, + 0x00E8, 0x0207, 0x010E, 0x0003, + 0x00E2, 0x0206, 0x0114, 0x0004, + 0x00DB, 0x0205, 0x011A, 0x0006, + 0x00D5, 0x0204, 0x0120, 0x0007, + 0x00CF, 0x0203, 0x0125, 0x0009, + 0x00C9, 0x0201, 0x012C, 0x000A, + 0x00C3, 0x01FF, 0x0132, 0x000C, + 0x00BD, 0x01FD, 0x0138, 0x000E, + 0x00B7, 0x01FB, 0x013E, 0x0010, + 0x00B1, 0x01F9, 0x0144, 0x0012, + 0x00AC, 0x01F7, 0x0149, 0x0014, + 0x00A6, 0x01F4, 0x0150, 0x0016, + 0x00A0, 0x01F2, 0x0156, 0x0018, + 0x009B, 0x01EF, 0x015C, 0x001A, + 0x0095, 0x01EC, 0x0162, 0x001D, + 0x0090, 0x01E9, 0x0168, 0x001F, + 0x008B, 0x01E6, 0x016D, 0x0022, + 0x0085, 0x01E3, 0x0173, 0x0025, + 0x0080, 0x01DF, 0x0179, 0x0028, + 0x007B, 0x01DC, 0x017E, 0x002B, + 0x0076, 0x01D8, 0x0184, 0x002E, + 0x0071, 0x01D4, 0x018A, 0x0031, + 0x006D, 0x01D1, 0x018E, 0x0034, + 0x0068, 0x01CD, 0x0193, 0x0038, + 0x0063, 0x01C8, 0x019A, 0x003B, + 0x005F, 0x01C4, 0x019E, 0x003F, + 0x005B, 0x01C0, 0x01A3, 0x0042, + 0x0056, 0x01BB, 0x01A9, 0x0046, + 0x0052, 0x01B7, 0x01AD, 0x004A, + 0x004E, 0x01B2, 0x01B2, 0x004E, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.5_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.500000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { + 0x00E5, 0x0236, 0x00E5, 0x0000, + 0x00DE, 0x0235, 0x00ED, 0x0000, + 0x00D7, 0x0235, 0x00F4, 0x0000, + 0x00D0, 0x0235, 0x00FB, 0x0000, + 0x00C9, 0x0234, 0x0102, 0x0001, + 0x00C2, 0x0233, 0x010A, 0x0001, + 0x00BC, 0x0232, 0x0111, 0x0001, + 0x00B5, 0x0230, 0x0119, 0x0002, + 0x00AE, 0x022F, 0x0121, 0x0002, + 0x00A8, 0x022D, 0x0128, 0x0003, + 0x00A2, 0x022B, 0x012F, 0x0004, + 0x009B, 0x0229, 0x0137, 0x0005, + 0x0095, 0x0226, 0x013F, 0x0006, + 0x008F, 0x0224, 0x0146, 0x0007, + 0x0089, 0x0221, 0x014E, 0x0008, + 0x0083, 0x021E, 0x0155, 0x000A, + 0x007E, 0x021B, 0x015C, 0x000B, + 0x0078, 0x0217, 0x0164, 0x000D, + 0x0072, 0x0213, 0x016D, 0x000E, + 0x006D, 0x0210, 0x0173, 0x0010, + 0x0068, 0x020C, 0x017A, 0x0012, + 0x0063, 0x0207, 0x0182, 0x0014, + 0x005E, 0x0203, 0x0189, 0x0016, + 0x0059, 0x01FE, 0x0191, 0x0018, + 0x0054, 0x01F9, 0x0198, 0x001B, + 0x0050, 0x01F4, 0x019F, 0x001D, + 0x004B, 0x01EF, 0x01A6, 0x0020, + 0x0047, 0x01EA, 0x01AC, 0x0023, + 0x0043, 0x01E4, 0x01B3, 0x0026, + 0x003F, 0x01DF, 0x01B9, 0x0029, + 0x003B, 0x01D9, 0x01C0, 0x002C, + 0x0037, 0x01D3, 0x01C6, 0x0030, + 0x0033, 0x01CD, 0x01CD, 0x0033, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.6_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.600000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { + 0x00C8, 0x026F, 0x00C9, 0x0000, + 0x00C0, 0x0270, 0x00D1, 0x0FFF, + 0x00B8, 0x0270, 0x00D9, 0x0FFF, + 0x00B1, 0x0270, 0x00E1, 0x0FFE, + 0x00A9, 0x026F, 0x00EB, 0x0FFD, + 0x00A2, 0x026E, 0x00F3, 0x0FFD, + 0x009A, 0x026D, 0x00FD, 0x0FFC, + 0x0093, 0x026C, 0x0105, 0x0FFC, + 0x008C, 0x026A, 0x010F, 0x0FFB, + 0x0085, 0x0268, 0x0118, 0x0FFB, + 0x007E, 0x0265, 0x0122, 0x0FFB, + 0x0078, 0x0263, 0x012A, 0x0FFB, + 0x0071, 0x0260, 0x0134, 0x0FFB, + 0x006B, 0x025C, 0x013E, 0x0FFB, + 0x0065, 0x0259, 0x0147, 0x0FFB, + 0x005F, 0x0255, 0x0151, 0x0FFB, + 0x0059, 0x0251, 0x015A, 0x0FFC, + 0x0054, 0x024D, 0x0163, 0x0FFC, + 0x004E, 0x0248, 0x016D, 0x0FFD, + 0x0049, 0x0243, 0x0176, 0x0FFE, + 0x0044, 0x023E, 0x017F, 0x0FFF, + 0x003F, 0x0238, 0x0189, 0x0000, + 0x003A, 0x0232, 0x0193, 0x0001, + 0x0036, 0x022C, 0x019C, 0x0002, + 0x0031, 0x0226, 0x01A5, 0x0004, + 0x002D, 0x021F, 0x01AF, 0x0005, + 0x0029, 0x0218, 0x01B8, 0x0007, + 0x0025, 0x0211, 0x01C1, 0x0009, + 0x0022, 0x020A, 0x01C9, 0x000B, + 0x001E, 0x0203, 0x01D2, 0x000D, + 0x001B, 0x01FB, 0x01DA, 0x0010, + 0x0018, 0x01F3, 0x01E3, 0x0012, + 0x0015, 0x01EB, 0x01EB, 0x0015, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.7_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.700000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { + 0x00A3, 0x02B9, 0x00A4, 0x0000, + 0x009A, 0x02BA, 0x00AD, 0x0FFF, + 0x0092, 0x02BA, 0x00B6, 0x0FFE, + 0x0089, 0x02BA, 0x00C1, 0x0FFC, + 0x0081, 0x02B9, 0x00CB, 0x0FFB, + 0x0079, 0x02B8, 0x00D5, 0x0FFA, + 0x0071, 0x02B7, 0x00DF, 0x0FF9, + 0x0069, 0x02B5, 0x00EA, 0x0FF8, + 0x0062, 0x02B3, 0x00F4, 0x0FF7, + 0x005B, 0x02B0, 0x00FF, 0x0FF6, + 0x0054, 0x02AD, 0x010B, 0x0FF4, + 0x004D, 0x02A9, 0x0117, 0x0FF3, + 0x0046, 0x02A5, 0x0123, 0x0FF2, + 0x0040, 0x02A1, 0x012D, 0x0FF2, + 0x003A, 0x029C, 0x0139, 0x0FF1, + 0x0034, 0x0297, 0x0145, 0x0FF0, + 0x002F, 0x0292, 0x0150, 0x0FEF, + 0x0029, 0x028C, 0x015C, 0x0FEF, + 0x0024, 0x0285, 0x0169, 0x0FEE, + 0x001F, 0x027F, 0x0174, 0x0FEE, + 0x001B, 0x0278, 0x017F, 0x0FEE, + 0x0016, 0x0270, 0x018D, 0x0FED, + 0x0012, 0x0268, 0x0199, 0x0FED, + 0x000E, 0x0260, 0x01A4, 0x0FEE, + 0x000B, 0x0258, 0x01AF, 0x0FEE, + 0x0007, 0x024F, 0x01BC, 0x0FEE, + 0x0004, 0x0246, 0x01C7, 0x0FEF, + 0x0001, 0x023D, 0x01D3, 0x0FEF, + 0x0FFE, 0x0233, 0x01DF, 0x0FF0, + 0x0FFC, 0x0229, 0x01EA, 0x0FF1, + 0x0FFA, 0x021F, 0x01F4, 0x0FF3, + 0x0FF8, 0x0215, 0x01FF, 0x0FF4, + 0x0FF6, 0x020A, 0x020A, 0x0FF6, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.8_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.800000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { + 0x0075, 0x0315, 0x0076, 0x0000, + 0x006C, 0x0316, 0x007F, 0x0FFF, + 0x0062, 0x0316, 0x008A, 0x0FFE, + 0x0059, 0x0315, 0x0096, 0x0FFC, + 0x0050, 0x0314, 0x00A1, 0x0FFB, + 0x0048, 0x0312, 0x00AD, 0x0FF9, + 0x0040, 0x0310, 0x00B8, 0x0FF8, + 0x0038, 0x030D, 0x00C5, 0x0FF6, + 0x0030, 0x030A, 0x00D1, 0x0FF5, + 0x0029, 0x0306, 0x00DE, 0x0FF3, + 0x0022, 0x0301, 0x00EB, 0x0FF2, + 0x001C, 0x02FC, 0x00F8, 0x0FF0, + 0x0015, 0x02F7, 0x0106, 0x0FEE, + 0x0010, 0x02F1, 0x0112, 0x0FED, + 0x000A, 0x02EA, 0x0121, 0x0FEB, + 0x0005, 0x02E3, 0x012F, 0x0FE9, + 0x0000, 0x02DB, 0x013D, 0x0FE8, + 0x0FFB, 0x02D3, 0x014C, 0x0FE6, + 0x0FF7, 0x02CA, 0x015A, 0x0FE5, + 0x0FF3, 0x02C1, 0x0169, 0x0FE3, + 0x0FF0, 0x02B7, 0x0177, 0x0FE2, + 0x0FEC, 0x02AD, 0x0186, 0x0FE1, + 0x0FE9, 0x02A2, 0x0196, 0x0FDF, + 0x0FE7, 0x0297, 0x01A4, 0x0FDE, + 0x0FE4, 0x028C, 0x01B3, 0x0FDD, + 0x0FE2, 0x0280, 0x01C2, 0x0FDC, + 0x0FE0, 0x0274, 0x01D0, 0x0FDC, + 0x0FDF, 0x0268, 0x01DE, 0x0FDB, + 0x0FDD, 0x025B, 0x01EE, 0x0FDA, + 0x0FDC, 0x024E, 0x01FC, 0x0FDA, + 0x0FDB, 0x0241, 0x020A, 0x0FDA, + 0x0FDB, 0x0233, 0x0218, 0x0FDA, + 0x0FDA, 0x0226, 0x0226, 0x0FDA, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_0.9_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 0.900000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { + 0x003F, 0x0383, 0x003E, 0x0000, + 0x0034, 0x0383, 0x004A, 0x0FFF, + 0x002B, 0x0383, 0x0054, 0x0FFE, + 0x0021, 0x0381, 0x0061, 0x0FFD, + 0x0019, 0x037F, 0x006C, 0x0FFC, + 0x0010, 0x037C, 0x0079, 0x0FFB, + 0x0008, 0x0378, 0x0086, 0x0FFA, + 0x0001, 0x0374, 0x0093, 0x0FF8, + 0x0FFA, 0x036E, 0x00A1, 0x0FF7, + 0x0FF3, 0x0368, 0x00B0, 0x0FF5, + 0x0FED, 0x0361, 0x00BF, 0x0FF3, + 0x0FE8, 0x035A, 0x00CD, 0x0FF1, + 0x0FE2, 0x0352, 0x00DC, 0x0FF0, + 0x0FDE, 0x0349, 0x00EB, 0x0FEE, + 0x0FD9, 0x033F, 0x00FC, 0x0FEC, + 0x0FD5, 0x0335, 0x010D, 0x0FE9, + 0x0FD2, 0x032A, 0x011D, 0x0FE7, + 0x0FCF, 0x031E, 0x012E, 0x0FE5, + 0x0FCC, 0x0312, 0x013F, 0x0FE3, + 0x0FCA, 0x0305, 0x0150, 0x0FE1, + 0x0FC8, 0x02F8, 0x0162, 0x0FDE, + 0x0FC6, 0x02EA, 0x0174, 0x0FDC, + 0x0FC5, 0x02DC, 0x0185, 0x0FDA, + 0x0FC4, 0x02CD, 0x0197, 0x0FD8, + 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, + 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, + 0x0FC3, 0x029F, 0x01CD, 0x0FD1, + 0x0FC3, 0x028E, 0x01E0, 0x0FCF, + 0x0FC3, 0x027E, 0x01F2, 0x0FCD, + 0x0FC4, 0x026D, 0x0203, 0x0FCC, + 0x0FC5, 0x025C, 0x0215, 0x0FCA, + 0x0FC6, 0x024B, 0x0227, 0x0FC8, + 0x0FC7, 0x0239, 0x0239, 0x0FC7, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 03-Apr-2024 +// <coeffDescrip> 4t_64p_LanczosEd_p_1_p_10qb_ +// <num_taps> 4 +// <num_phases> 64 +// <scale_ratio> input/output = 1.000000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { + 0x0000, 0x0400, 0x0000, 0x0000, + 0x0FF6, 0x03FF, 0x000B, 0x0000, + 0x0FED, 0x03FE, 0x0015, 0x0000, + 0x0FE4, 0x03FB, 0x0022, 0x0FFF, + 0x0FDC, 0x03F7, 0x002E, 0x0FFF, + 0x0FD5, 0x03F2, 0x003B, 0x0FFE, + 0x0FCE, 0x03EC, 0x0048, 0x0FFE, + 0x0FC8, 0x03E5, 0x0056, 0x0FFD, + 0x0FC3, 0x03DC, 0x0065, 0x0FFC, + 0x0FBE, 0x03D3, 0x0075, 0x0FFA, + 0x0FB9, 0x03C9, 0x0085, 0x0FF9, + 0x0FB6, 0x03BE, 0x0094, 0x0FF8, + 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, + 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, + 0x0FAD, 0x0397, 0x00CA, 0x0FF2, + 0x0FAB, 0x0389, 0x00DC, 0x0FF0, + 0x0FAA, 0x0379, 0x00EF, 0x0FEE, + 0x0FA9, 0x0369, 0x0102, 0x0FEC, + 0x0FA9, 0x0359, 0x0115, 0x0FE9, + 0x0FA9, 0x0348, 0x0129, 0x0FE6, + 0x0FA9, 0x0336, 0x013D, 0x0FE4, + 0x0FA9, 0x0323, 0x0153, 0x0FE1, + 0x0FAA, 0x0310, 0x0168, 0x0FDE, + 0x0FAC, 0x02FD, 0x017C, 0x0FDB, + 0x0FAD, 0x02E9, 0x0192, 0x0FD8, + 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, + 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, + 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, + 0x0FB5, 0x0296, 0x01E9, 0x0FCC, + 0x0FB8, 0x0281, 0x01FE, 0x0FC9, + 0x0FBA, 0x026C, 0x0214, 0x0FC6, + 0x0FBD, 0x0256, 0x022A, 0x0FC3, + 0x0FC0, 0x0240, 0x0240, 0x0FC0, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.3_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.300000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { + 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, + 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, + 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, + 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, + 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, + 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, + 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, + 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, + 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, + 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, + 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, + 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, + 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, + 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, + 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, + 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, + 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, + 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, + 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, + 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, + 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, + 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, + 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, + 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, + 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, + 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, + 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, + 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, + 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, + 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, + 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, + 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, + 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.4_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.400000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { + 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, + 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, + 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, + 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, + 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, + 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, + 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, + 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, + 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, + 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, + 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, + 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, + 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, + 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, + 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, + 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, + 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, + 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, + 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, + 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, + 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, + 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, + 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, + 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, + 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, + 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, + 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, + 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, + 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, + 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, + 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, + 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, + 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.5_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.500000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { + 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, + 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, + 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, + 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, + 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, + 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, + 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, + 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, + 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, + 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, + 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, + 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, + 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, + 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, + 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, + 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, + 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, + 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, + 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, + 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, + 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, + 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, + 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, + 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, + 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, + 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, + 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, + 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, + 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, + 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, + 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, + 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, + 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.6_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.600000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { + 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, + 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, + 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, + 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, + 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, + 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, + 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, + 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, + 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, + 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, + 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, + 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, + 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, + 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, + 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, + 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, + 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, + 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, + 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, + 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, + 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, + 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, + 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, + 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, + 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, + 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, + 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, + 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, + 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, + 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, + 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, + 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, + 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.7_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.700000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { + 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, + 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, + 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, + 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, + 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, + 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, + 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, + 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, + 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, + 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, + 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, + 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, + 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, + 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, + 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, + 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, + 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, + 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, + 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, + 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, + 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, + 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, + 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, + 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, + 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, + 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, + 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, + 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, + 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, + 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, + 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, + 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, + 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.8_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.800000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { + 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, + 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, + 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, + 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, + 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, + 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, + 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, + 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, + 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, + 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, + 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, + 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, + 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, + 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, + 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, + 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, + 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, + 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, + 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, + 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, + 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, + 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, + 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, + 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, + 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, + 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, + 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, + 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, + 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, + 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, + 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, + 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, + 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_0.9_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 0.900000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { + 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, + 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, + 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, + 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, + 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, + 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, + 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, + 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, + 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, + 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, + 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, + 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, + 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, + 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, + 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, + 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, + 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, + 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, + 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, + 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, + 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, + 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, + 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, + 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, + 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, + 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, + 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, + 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, + 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, + 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, + 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, +}; + +//======================================================== +// <using> gen_scaler_coeffs_cnf_file.m +// <using> make_test_script.m +// <date> 02-Apr-2024 +// <coeffDescrip> 6t_64p_LanczosEd_p_1_p_10qb_ +// <num_taps> 6 +// <num_phases> 64 +// <scale_ratio> input/output = 1.000000000000 +// <CoefType> LanczosEd +// <CoefQuant> S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { + 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, + 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, + 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, + 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, + 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, + 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, + 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, + 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, + 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, + 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, + 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, + 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, + 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, + 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, + 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, + 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, + 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, + 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, + 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, + 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, + 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, + 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, + 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, + 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, + 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, + 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, + 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, + 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, + 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, + 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, + 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, + 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, + 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, +}; + +/* Converted scaler coeff tables from S1.10 to S1.12 */ +static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; +static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; +static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; + +struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x251F}, + {5, 10, 0x291F}, + {6, 10, 0xA51F}, + {7, 10, 0xA51F}, + {8, 10, 0xAA66}, + {9, 10, 0xA51F}, + {1, 1, 0xA640}, + {-1, -1, 0xA640}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x9600}, + {5, 10, 0xA460}, + {6, 10, 0xA8E0}, + {7, 10, 0xAC00}, + {8, 10, 0xAD20}, + {9, 10, 0xAFC0}, + {1, 1, 0xB058}, + {-1, -1, 0xB058}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x9900}, + {7, 10, 0xA100}, + {8, 10, 0xA8C0}, + {9, 10, 0xAB20}, + {1, 1, 0xAC00}, + {-1, -1, 0xAC00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4100}, + {9, 10, 0x9F00}, + {1, 1, 0xA4C0}, + {-1, -1, 0xA8D8}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4000}, + {9, 10, 0x24FE}, + {1, 1, 0x2D64}, + {-1, -1, 0x3ADB}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x3886}, + {9, 10, 0x3940}, + {1, 1, 0x3A4E}, + {-1, -1, 0x3B66}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x36F4}, + {9, 10, 0x359C}, + {1, 1, 0x3360}, + {-1, -1, 0x2F20}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x359C}, + {1, 1, 0x31F0}, + {-1, -1, 0x1F00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x9F00}, + {1, 1, 0xA400}, + {-1, -1, 0x9E00}, +}; + +void spl_init_easf_filter_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, + easf_filter_3tap_64p_ratio_0_30_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, + easf_filter_3tap_64p_ratio_0_40_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, + easf_filter_3tap_64p_ratio_0_50_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, + easf_filter_3tap_64p_ratio_0_60_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, + easf_filter_3tap_64p_ratio_0_70_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, + easf_filter_3tap_64p_ratio_0_80_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, + easf_filter_3tap_64p_ratio_0_90_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, + easf_filter_3tap_64p_ratio_1_00_s1_12, 3); + + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, + easf_filter_4tap_64p_ratio_0_30_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, + easf_filter_4tap_64p_ratio_0_40_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, + easf_filter_4tap_64p_ratio_0_50_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, + easf_filter_4tap_64p_ratio_0_60_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, + easf_filter_4tap_64p_ratio_0_70_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, + easf_filter_4tap_64p_ratio_0_80_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, + easf_filter_4tap_64p_ratio_0_90_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, + easf_filter_4tap_64p_ratio_1_00_s1_12, 4); + + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, + easf_filter_6tap_64p_ratio_0_30_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, + easf_filter_6tap_64p_ratio_0_40_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, + easf_filter_6tap_64p_ratio_0_50_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, + easf_filter_6tap_64p_ratio_0_60_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, + easf_filter_6tap_64p_ratio_0_70_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, + easf_filter_6tap_64p_ratio_0_80_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, + easf_filter_6tap_64p_ratio_0_90_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, + easf_filter_6tap_64p_ratio_1_00_s1_12, 6); +} + +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_3tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_3tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_3tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_3tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_3tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_3tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_3tap_64p_ratio_0_90_s1_12; + else + return easf_filter_3tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_4tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_4tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_4tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_4tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_4tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_4tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_4tap_64p_ratio_0_90_s1_12; + else + return easf_filter_4tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_6tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_6tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_6tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_6tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_6tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_6tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_6tap_64p_ratio_0_90_s1_12; + else + return easf_filter_6tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) +{ + if (taps == 6) + return spl_get_easf_filter_6tap_64p(ratio); + else if (taps == 4) + return spl_get_easf_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_easf_filter_3tap_64p(ratio); + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h) +{ + /* + * Old coefficients calculated scaling ratio = input / output + * New coefficients are calculated based on = output / input + */ + if (enable_easf_h) { + dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps, data->recip_ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps_c, data->recip_ratios.horz_c); + } else { + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + } + if (enable_easf_v) { + dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps, data->recip_ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps_c, data->recip_ratios.vert_c); + } else { + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); + } +} + +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, + struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, + unsigned int num_entries) +{ + unsigned int count = 0; + uint32_t value = 0; + struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; + + lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); + value = lookup_table_index_ptr->reg_value; + + while (count < num_entries) { + + lookup_table_index_ptr = (lookup_table_base_ptr + count); + if (lookup_table_index_ptr->numer < 0) + break; + + if (ratio.value < spl_fixpt_from_fraction( + lookup_table_index_ptr->numer, + lookup_table_index_ptr->denom).value) { + value = lookup_table_index_ptr->reg_value; + break; + } + + count++; + } + return value; +} +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_v_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_h_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_uptilt_offset_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt_maxval_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt1_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_offset_lookup, num_entries); + } else + value = 0; + return value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h new file mode 100644 index 000000000000..8bb2b8108e38 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ +#define __DC_SPL_SCL_EASF_FILTERS_H__ + +#include "dc_spl_types.h" + +struct scale_ratio_to_reg_value_lookup { + int numer; + int denom; + const uint32_t reg_value; +}; + +void spl_init_easf_filter_coeffs(void); +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h); + +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); + +#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index e2baaf584139..b02c7b0b262b 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // <num_taps> = 2 @@ -1317,97 +1318,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1422,3 +1423,29 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } + +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 6d96aca53b24..48202bc4f81e 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,53 +7,16 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_get_filter_3tap_16p_upscale(void); -const uint16_t *spl_get_filter_3tap_16p_116(void); -const uint16_t *spl_get_filter_3tap_16p_149(void); -const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p_upscale(void); -const uint16_t *spl_get_filter_4tap_16p_116(void); -const uint16_t *spl_get_filter_4tap_16p_149(void); -const uint16_t *spl_get_filter_4tap_16p_183(void); - -const uint16_t *spl_get_filter_3tap_64p_upscale(void); -const uint16_t *spl_get_filter_3tap_64p_116(void); -const uint16_t *spl_get_filter_3tap_64p_149(void); -const uint16_t *spl_get_filter_3tap_64p_183(void); - -const uint16_t *spl_get_filter_4tap_64p_upscale(void); -const uint16_t *spl_get_filter_4tap_64p_116(void); -const uint16_t *spl_get_filter_4tap_64p_149(void); -const uint16_t *spl_get_filter_4tap_64p_183(void); - -const uint16_t *spl_get_filter_5tap_64p_upscale(void); -const uint16_t *spl_get_filter_5tap_64p_116(void); -const uint16_t *spl_get_filter_5tap_64p_149(void); -const uint16_t *spl_get_filter_5tap_64p_183(void); - -const uint16_t *spl_get_filter_6tap_64p_upscale(void); -const uint16_t *spl_get_filter_6tap_64p_116(void); -const uint16_t *spl_get_filter_6tap_64p_149(void); -const uint16_t *spl_get_filter_6tap_64p_183(void); - -const uint16_t *spl_get_filter_7tap_64p_upscale(void); -const uint16_t *spl_get_filter_7tap_64p_116(void); -const uint16_t *spl_get_filter_7tap_64p_149(void); -const uint16_t *spl_get_filter_7tap_64p_183(void); - -const uint16_t *spl_get_filter_8tap_64p_upscale(void); -const uint16_t *spl_get_filter_8tap_64p_116(void); -const uint16_t *spl_get_filter_8tap_64p_149(void); -const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c deleted file mode 100644 index bb0e1b80ec3c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 36d10b0f2eed..85b19ebe2c57 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,14 +2,16 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" // swap -#ifndef ASSERT -#define ASSERT(_bool) ((void *)0) -#endif -#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ +#include "spl_os_types.h" // swap +#ifndef SPL_ASSERT +#define SPL_ASSERT(_bool) ((void *)0) +#endif +#include "spl_fixpt31_32.h" // fixed31_32 and related functions +#include "spl_custom_float.h" // custom float and related functions + struct spl_size { uint32_t width; uint32_t height; @@ -22,16 +24,16 @@ struct spl_rect { }; struct spl_ratios { - struct fixed31_32 horz; - struct fixed31_32 vert; - struct fixed31_32 horz_c; - struct fixed31_32 vert_c; + struct spl_fixed31_32 horz; + struct spl_fixed31_32 vert; + struct spl_fixed31_32 horz_c; + struct spl_fixed31_32 vert_c; }; struct spl_inits { - struct fixed31_32 h; - struct fixed31_32 h_c; - struct fixed31_32 v; - struct fixed31_32 v_c; + struct spl_fixed31_32 h; + struct spl_fixed31_32 h_c; + struct spl_fixed31_32 v; + struct spl_fixed31_32 v_c; }; struct spl_taps { @@ -64,6 +66,8 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, + SPL_PIXEL_FORMAT_422BPP8, + SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -135,6 +139,7 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; + struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -402,13 +407,19 @@ struct dscl_prog_data { /* blur and scale filter */ const uint16_t *filter_blur_scale_v; const uint16_t *filter_blur_scale_h; + int sharpness_level; /* Track sharpness level */ }; /* SPL input and output definitions */ -// SPL outputs struct -struct spl_out { +// SPL scratch struct +struct spl_scratch { // Pack all SPL outputs in scl_data struct spl_scaler_data scl_data; +}; + +/* SPL input and output definitions */ +// SPL outputs struct +struct spl_out { // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -450,14 +461,26 @@ struct basic_out { bool alpha_en; bool use_two_pixels_per_container; }; -enum explicit_sharpness { - SHARPNESS_LOW = 0, - SHARPNESS_MID, - SHARPNESS_HIGH -}; -struct adaptive_sharpness { +enum sharpness_setting { + SHARPNESS_HW_OFF = 0, + SHARPNESS_ZERO, + SHARPNESS_CUSTOM +}; +struct spl_sharpness_range { + int sdr_rgb_min; + int sdr_rgb_max; + int sdr_rgb_mid; + int sdr_yuv_min; + int sdr_yuv_max; + int sdr_yuv_mid; + int hdr_rgb_min; + int hdr_rgb_max; + int hdr_rgb_mid; +}; +struct adaptive_sharpness { bool enable; - enum explicit_sharpness sharpness; + int sharpness_level; + struct spl_sharpness_range sharpness_range; }; enum linear_light_scaling { // convert it in translation logic LLS_PREF_DONT_CARE = 0, @@ -491,6 +514,11 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; + bool is_fullscreen; + bool is_hdr_on; + int h_active; + int v_active; + int hdr_multx100; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c new file mode 100644 index 000000000000..be2f34d034c5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_debug.h" +#include "spl_custom_float.h" + +static bool spl_build_custom_float(struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + bool *negative, + uint32_t *mantissa, + uint32_t *exponenta) +{ + uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1; + + const struct spl_fixed31_32 mantissa_constant_plus_max_fraction = + spl_fixpt_from_fraction((1LL << (format->mantissa_bits + 1)) - 1, + 1LL << format->mantissa_bits); + + struct spl_fixed31_32 mantiss; + + if (spl_fixpt_eq(value, spl_fixpt_zero)) { + *negative = false; + *mantissa = 0; + *exponenta = 0; + return true; + } + + if (spl_fixpt_lt(value, spl_fixpt_zero)) { + *negative = format->sign; + value = spl_fixpt_neg(value); + } else { + *negative = false; + } + + if (spl_fixpt_lt(value, spl_fixpt_one)) { + uint32_t i = 1; + + do { + value = spl_fixpt_shl(value, 1); + ++i; + } while (spl_fixpt_lt(value, spl_fixpt_one)); + + --i; + + if (exp_offset <= i) { + *mantissa = 0; + *exponenta = 0; + return true; + } + + *exponenta = exp_offset - i; + } else if (spl_fixpt_le(mantissa_constant_plus_max_fraction, value)) { + uint32_t i = 1; + + do { + value = spl_fixpt_shr(value, 1); + ++i; + } while (spl_fixpt_lt(mantissa_constant_plus_max_fraction, value)); + + *exponenta = exp_offset + i - 1; + } else { + *exponenta = exp_offset; + } + + mantiss = spl_fixpt_sub(value, spl_fixpt_one); + + if (spl_fixpt_lt(mantiss, spl_fixpt_zero) || + spl_fixpt_lt(spl_fixpt_one, mantiss)) + mantiss = spl_fixpt_zero; + else + mantiss = spl_fixpt_shl(mantiss, format->mantissa_bits); + + *mantissa = spl_fixpt_floor(mantiss); + + return true; +} + +static bool spl_setup_custom_float(const struct spl_custom_float_format *format, + bool negative, + uint32_t mantissa, + uint32_t exponenta, + uint32_t *result) +{ + uint32_t i = 0; + uint32_t j = 0; + uint32_t value = 0; + + /* verification code: + * once calculation is ok we can remove it + */ + + const uint32_t mantissa_mask = + (1 << (format->mantissa_bits + 1)) - 1; + + const uint32_t exponenta_mask = + (1 << (format->exponenta_bits + 1)) - 1; + + if (mantissa & ~mantissa_mask) { + SPL_BREAK_TO_DEBUGGER(); + mantissa = mantissa_mask; + } + + if (exponenta & ~exponenta_mask) { + SPL_BREAK_TO_DEBUGGER(); + exponenta = exponenta_mask; + } + + /* end of verification code */ + + while (i < format->mantissa_bits) { + uint32_t mask = 1 << i; + + if (mantissa & mask) + value |= mask; + + ++i; + } + + while (j < format->exponenta_bits) { + uint32_t mask = 1 << j; + + if (exponenta & mask) + value |= mask << i; + + ++j; + } + + if (negative && format->sign) + value |= 1 << (i + j); + + *result = value; + + return true; +} + +bool spl_convert_to_custom_float_format(struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + uint32_t *result) +{ + uint32_t mantissa; + uint32_t exponenta; + bool negative; + + return spl_build_custom_float(value, format, &negative, &mantissa, &exponenta) && + spl_setup_custom_float(format, + negative, + mantissa, + exponenta, + result); +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h new file mode 100644 index 000000000000..cdc4e107b9de --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef SPL_CUSTOM_FLOAT_H_ +#define SPL_CUSTOM_FLOAT_H_ + +#include "spl_os_types.h" +#include "spl_fixpt31_32.h" + +struct spl_custom_float_format { + uint32_t mantissa_bits; + uint32_t exponenta_bits; + bool sign; +}; + +struct spl_custom_float_value { + uint32_t mantissa; + uint32_t exponenta; + uint32_t value; + bool negative; +}; + +bool spl_convert_to_custom_float_format( + struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + uint32_t *result); + +#endif //SPL_CUSTOM_FLOAT_H_ diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h new file mode 100644 index 000000000000..5696dafd0894 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef SPL_DEBUG_H +#define SPL_DEBUG_H + +#ifdef SPL_ASSERT +#undef SPL_ASSERT +#endif +#define SPL_ASSERT(b) + +#define SPL_ASSERT_CRITICAL(expr) do {if (expr)/* Do nothing */; } while (0) + +#ifdef SPL_DALMSG +#undef SPL_DALMSG +#endif +#define SPL_DALMSG(b) + +#ifdef SPL_DAL_ASSERT_MSG +#undef SPL_DAL_ASSERT_MSG +#endif +#define SPL_DAL_ASSERT_MSG(b, m) + +#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c new file mode 100644 index 000000000000..a95565df5487 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_fixpt31_32.h" + +static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; + +static inline unsigned long long abs_i64( + long long arg) +{ + if (arg > 0) + return (unsigned long long)arg; + else + return (unsigned long long)(-arg); +} + +/* + * @brief + * result = dividend / divisor + * *remainder = dividend % divisor + */ +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) +{ + unsigned long long result; + + ASSERT(divisor); + + result = spl_div64_u64_rem(dividend, divisor, remainder); + + return result; +} + + +#define FRACTIONAL_PART_MASK \ + ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) + +#define GET_INTEGER_PART(x) \ + ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + +#define GET_FRACTIONAL_PART(x) \ + (FRACTIONAL_PART_MASK & (x)) + +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; + + unsigned long long remainder; + + /* determine integer part */ + + unsigned long long res_value = complete_integer_division_u64( + arg1_value, arg2_value, &remainder); + + ASSERT(res_value <= LONG_MAX); + + /* determine fractional part */ + { + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + unsigned long long summand = (remainder << 1) >= arg2_value; + + ASSERT(res_value <= LLONG_MAX - summand); + + res_value += summand; + } + + res.value = (long long)res_value; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = arg1.value < 0; + bool arg2_negative = arg2.value < 0; + + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; + + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); + + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); + + unsigned long long tmp; + + res.value = arg1_int * arg2_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg1_int * arg2_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg2_int * arg1_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg1_fra * arg2_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + unsigned long long arg_value = abs_i64(arg.value); + + unsigned long long arg_int = GET_INTEGER_PART(arg_value); + + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); + + unsigned long long tmp; + + res.value = arg_int * arg_int; + + ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg_int * arg_fra; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg_fra * arg_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) +{ + /* + * @note + * Good idea to use Newton's method + */ + + ASSERT(arg.value); + + return spl_fixpt_from_fraction( + spl_fixpt_one.value, + arg.value); +} + +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 square; + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 27; + + struct spl_fixed31_32 arg_norm = arg; + + if (spl_fixpt_le( + spl_fixpt_two_pi, + spl_fixpt_abs(arg))) { + arg_norm = spl_fixpt_sub( + arg_norm, + spl_fixpt_mul_int( + spl_fixpt_two_pi, + (int)spl_div64_s64( + arg_norm.value, + spl_fixpt_two_pi.value))); + } + + square = spl_fixpt_sqr(arg_norm); + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n > 2); + + if (arg.value != arg_norm.value) + res = spl_fixpt_div( + spl_fixpt_mul(res, arg_norm), + arg); + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) +{ + return spl_fixpt_mul( + arg, + spl_fixpt_sinc(arg)); +} + +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) +{ + /* TODO implement argument normalization */ + + const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 26; + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n != 0); + + return res; +} + +/* + * @brief + * result = exp(arg), + * where abs(arg) < 1 + * + * Calculated as Taylor series. + */ +static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) +{ + unsigned int n = 9; + + struct spl_fixed31_32 res = spl_fixpt_from_fraction( + n + 2, + n + 1); + /* TODO find correct res */ + + ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); + + do + res = spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + arg, + res), + n)); + while (--n != 1); + + return spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_mul( + arg, + res)); +} + +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) +{ + /* + * @brief + * Main equation is: + * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), + * where m = round(x / ln(2)), r = x - m * ln(2) + */ + + if (spl_fixpt_le( + spl_fixpt_ln2_div_2, + spl_fixpt_abs(arg))) { + int m = spl_fixpt_round( + spl_fixpt_div( + arg, + spl_fixpt_ln2)); + + struct spl_fixed31_32 r = spl_fixpt_sub( + arg, + spl_fixpt_mul_int( + spl_fixpt_ln2, + m)); + + ASSERT(m != 0); + + ASSERT(spl_fixpt_lt( + spl_fixpt_abs(r), + spl_fixpt_one)); + + if (m > 0) + return spl_fixpt_shl( + fixed31_32_exp_from_taylor_series(r), + (unsigned char)m); + else + return spl_fixpt_div_int( + fixed31_32_exp_from_taylor_series(r), + 1LL << -m); + } else if (arg.value != 0) + return fixed31_32_exp_from_taylor_series(arg); + else + return spl_fixpt_one; +} + +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); + /* TODO improve 1st estimation */ + + struct spl_fixed31_32 error; + + ASSERT(arg.value > 0); + /* TODO if arg is negative, return NaN */ + /* TODO if arg is zero, return -INF */ + + do { + struct spl_fixed31_32 res1 = spl_fixpt_add( + spl_fixpt_sub( + res, + spl_fixpt_one), + spl_fixpt_div( + arg, + spl_fixpt_exp(res))); + + error = spl_fixpt_sub( + res, + res1); + + res = res1; + /* TODO determine max_allowed_error based on quality of exp() */ + } while (abs_i64(error.value) > 100ULL); + + return res; +} + + +/* this function is a generic helper to translate fixed point value to + * specified integer format that will consist of integer_bits integer part and + * fractional_bits fractional part. For example it is used in + * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional + * part in 32 bits. It is used in hw programming (scaler) + */ + +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + /* 1. create mask of integer part */ + unsigned int result = (1 << integer_bits) - 1; + /* 2. mask out fractional part */ + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; + /* 3. shrink fixed point integer part to be of integer_bits width*/ + result &= GET_INTEGER_PART(value); + /* 4. make space for fractional part to be filled in after integer */ + result <<= fractional_bits; + /* 5. shrink fixed point fractional part to of fractional_bits width*/ + fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; + /* 6. merge the result */ + return result | fractional_part; +} + +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) +{ + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); + + if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) + return (1 << (integer_bits + fractional_bits)) - 1; + else if (truncated_val > min_clamp) + return truncated_val; + else + return min_clamp; +} + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 4, 19); +} + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 3, 19); +} + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 2, 19); +} + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 0, 19); +} + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 14, 1); +} + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 10, 1); +} + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); + else + return ux_dy(arg.value, 4, 19); +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; + struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h new file mode 100644 index 000000000000..8a045e2f8699 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h @@ -0,0 +1,525 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __SPL_FIXED31_32_H__ +#define __SPL_FIXED31_32_H__ + +#include "os_types.h" +#include "spl_os_types.h" // swap +#ifndef ASSERT +#define ASSERT(_bool) ((void *)0) +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX 9223372036854775807ll +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1ll) +#endif + +#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 +#ifndef LLONG_MIN +#define LLONG_MIN (1LL<<63) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX (-1LL>>1) +#endif + +/* + * @brief + * Arithmetic operations on real numbers + * represented as fixed-point numbers. + * There are: 1 bit for sign, + * 31 bit for integer part, + * 32 bits for fractional part. + * + * @note + * Currently, overflows and underflows are asserted; + * no special result returned. + */ + +struct spl_fixed31_32 { + long long value; +}; + + +/* + * @brief + * Useful constants + */ + +static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; +static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; +static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; +static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; + +/* + * @brief + * Initialization routines + */ + +/* + * @brief + * result = numerator / denominator + */ +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); + +/* + * @brief + * result = arg + */ +static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) +{ + struct spl_fixed31_32 res; + + res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; + + return res; +} + +/* + * @brief + * Unary operators + */ + +/* + * @brief + * result = -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + res.value = -arg.value; + + return res; +} + +/* + * @brief + * result = abs(arg) := (arg >= 0) ? arg : -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return spl_fixpt_neg(arg); + else + return arg; +} + +/* + * @brief + * Binary relational operators + */ + +/* + * @brief + * result = arg1 < arg2 + */ +static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value < arg2.value; +} + +/* + * @brief + * result = arg1 <= arg2 + */ +static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value <= arg2.value; +} + +/* + * @brief + * result = arg1 == arg2 + */ +static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value == arg2.value; +} + +/* + * @brief + * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg1; + else + return arg2; +} + +/* + * @brief + * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 + */ +static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg2; + else + return arg1; +} + +/* + * @brief + * | min_value, when arg <= min_value + * result = | arg, when min_value < arg < max_value + * | max_value, when arg >= max_value + */ +static inline struct spl_fixed31_32 spl_fixpt_clamp( + struct spl_fixed31_32 arg, + struct spl_fixed31_32 min_value, + struct spl_fixed31_32 max_value) +{ + if (spl_fixpt_le(arg, min_value)) + return min_value; + else if (spl_fixpt_le(max_value, arg)) + return max_value; + else + return arg; +} + +/* + * @brief + * Binary shift operators + */ + +/* + * @brief + * result = arg << shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) +{ + ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || + ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); + + arg.value = arg.value << shift; + + return arg; +} + +/* + * @brief + * result = arg >> shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) +{ + bool negative = arg.value < 0; + + if (negative) + arg.value = -arg.value; + arg.value = arg.value >> shift; + if (negative) + arg.value = -arg.value; + return arg; +} + +/* + * @brief + * Binary additive operators + */ + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || + ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); + + res.value = arg1.value + arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || + ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); + + res.value = arg1.value - arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); +} + + +/* + * @brief + * Binary multiplicative operators + */ + +/* + * @brief + * result = arg1 * arg2 + */ +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); + + +/* + * @brief + * result = arg1 * arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = square(arg) := arg * arg + */ +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) +{ + return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); +} + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return spl_fixpt_from_fraction(arg1.value, arg2.value); +} + +/* + * @brief + * Reciprocal function + */ + +/* + * @brief + * result = reciprocal(arg) := 1 / arg + * + * @note + * No special actions taken in case argument is zero. + */ +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); + +/* + * @brief + * Trigonometric functions + */ + +/* + * @brief + * result = sinc(arg) := sin(arg) / arg + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); + +/* + * @brief + * result = sin(arg) + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); + +/* + * @brief + * result = cos(arg) + * + * @note + * Argument specified in radians + * and should be in [-2pi...2pi] range - + * passing arguments outside that range + * will cause incorrect result! + */ +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); + +/* + * @brief + * Transcendent functions + */ + +/* + * @brief + * result = exp(arg) + * + * @note + * Currently, function is verified for abs(arg) <= 1. + */ +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); + +/* + * @brief + * result = log(arg) + * + * @note + * Currently, abs(arg) should be less than 1. + * No normalization is done. + * Currently, no special actions taken + * in case of invalid argument(s). Take care! + */ +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); + +/* + * @brief + * Power function + */ + +/* + * @brief + * result = pow(arg1, arg2) + * + * @note + * Currently, abs(arg1) should be less than 1. Take care! + */ +static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value == 0) + return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; + + return spl_fixpt_exp( + spl_fixpt_mul( + spl_fixpt_log(arg1), + arg2)); +} + +/* + * @brief + * Rounding functions + */ + +/* + * @brief + * result = floor(arg) := greatest integer lower than or equal to arg + */ +static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = round(arg) := integer nearest to arg + */ +static inline int spl_fixpt_round(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_half.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = ceil(arg) := lowest integer greater than or equal to arg + */ +static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_one.value - + spl_fixpt_epsilon.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* the following two function are used in scaler hw programming to convert fixed + * point value to format 2 bits from integer part and 19 bits from fractional + * part. The same applies for u0d19, 0 bits from integer part and 19 bits from + * fractional + */ + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg); + +static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) +{ + bool negative = arg.value < 0; + + if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { + ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); + return arg; + } + + if (negative) + arg.value = -arg.value; + arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); + if (negative) + arg.value = -arg.value; + return arg; +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h index 7ebea91c84f6..709706ed4f2c 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -1,28 +1,7 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * Copyright 2019 Raptor Engineering, LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ +/* Copyright 2019 Raptor Engineering, LLC */ #ifndef _SPL_OS_TYPES_H_ #define _SPL_OS_TYPES_H_ @@ -39,7 +18,6 @@ * general debug capabilities * */ -// TODO: need backport #define SPL_BREAK_TO_DEBUGGER() ASSERT(0) static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c5f99cbff0b6..e20c220aa8b4 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -111,7 +111,7 @@ #define DMUB_MAX_PHANTOM_PLANES ((DMUB_MAX_PLANES) / 2) /* Trace buffer offset for entry */ -#define TRACE_BUFFER_ENTRY_OFFSET 16 +#define TRACE_BUFFER_ENTRY_OFFSET 16 /** * Maximum number of dirty rects supported by FW. @@ -1879,7 +1879,12 @@ enum dmub_cmd_idle_opt_type { /** * DCN hardware notify idle. */ - DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2 + DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2, + + /** + * DCN hardware notify power state. + */ + DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE = 3, }; /** @@ -1907,6 +1912,33 @@ struct dmub_rb_cmd_idle_opt_dcn_notify_idle { }; /** + * enum dmub_idle_opt_dc_power_state - DC power states. + */ +enum dmub_idle_opt_dc_power_state { + DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN = 0, + DMUB_IDLE_OPT_DC_POWER_STATE_D0 = 1, + DMUB_IDLE_OPT_DC_POWER_STATE_D1 = 2, + DMUB_IDLE_OPT_DC_POWER_STATE_D2 = 4, + DMUB_IDLE_OPT_DC_POWER_STATE_D3 = 8, +}; + +/** + * struct dmub_idle_opt_set_dc_power_state_data - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ +struct dmub_idle_opt_set_dc_power_state_data { + uint8_t power_state; /**< power state */ + uint8_t pad[3]; /**< padding */ +}; + +/** + * struct dmub_rb_cmd_idle_opt_set_dc_power_state - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ +struct dmub_rb_cmd_idle_opt_set_dc_power_state { + struct dmub_cmd_header header; /**< header */ + struct dmub_idle_opt_set_dc_power_state_data data; +}; + +/** * struct dmub_clocks - Clock update notification. */ struct dmub_clocks { @@ -5298,6 +5330,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. */ struct dmub_rb_cmd_idle_opt_dcn_notify_idle idle_opt_notify_idle; + /** + * Definition of a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ + struct dmub_rb_cmd_idle_opt_set_dc_power_state idle_opt_set_dc_power_state; /* * Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index f5b725f10a7c..745fd052840d 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -61,7 +61,7 @@ enum amd_apu_flags { * acquires the list of IP blocks for the GPU in use on initialization. * It can then operate on this list to perform standard driver operations * such as: init, fini, suspend, resume, etc. -* +* * * IP block implementations are named using the following convention: * <functionality>_v<version> (E.g.: gfx_v6_0). @@ -251,19 +251,92 @@ enum DC_FEATURE_MASK { DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 }; +/** + * enum DC_DEBUG_MASK - Bits that are useful for debugging the Display Core IP + */ enum DC_DEBUG_MASK { + /** + * @DC_DISABLE_PIPE_SPLIT: If set, disable pipe-splitting + */ DC_DISABLE_PIPE_SPLIT = 0x1, + + /** + * @DC_DISABLE_STUTTER: If set, disable memory stutter mode + */ DC_DISABLE_STUTTER = 0x2, + + /** + * @DC_DISABLE_DSC: If set, disable display stream compression + */ DC_DISABLE_DSC = 0x4, + + /** + * @DC_DISABLE_CLOCK_GATING: If set, disable clock gating optimizations + */ DC_DISABLE_CLOCK_GATING = 0x8, + + /** + * @DC_DISABLE_PSR: If set, disable Panel self refresh v1 and PSR-SU + */ DC_DISABLE_PSR = 0x10, + + /** + * @DC_FORCE_SUBVP_MCLK_SWITCH: If set, force mclk switch in subvp, even + * if mclk switch in vblank is possible + */ DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, + + /** + * @DC_DISABLE_MPO: If set, disable multi-plane offloading + */ DC_DISABLE_MPO = 0x40, + + /** + * @DC_ENABLE_DPIA_TRACE: If set, enable trace logging for DPIA + */ DC_ENABLE_DPIA_TRACE = 0x80, + + /** + * @DC_ENABLE_DML2: If set, force usage of DML2, even if the DCN version + * does not default to it. + */ DC_ENABLE_DML2 = 0x100, + + /** + * @DC_DISABLE_PSR_SU: If set, disable PSR SU + */ DC_DISABLE_PSR_SU = 0x200, + + /** + * @DC_DISABLE_REPLAY: If set, disable Panel Replay + */ DC_DISABLE_REPLAY = 0x400, + + /** + * @DC_DISABLE_IPS: If set, disable all Idle Power States, all the time. + * If more than one IPS debug bit is set, the lowest bit takes + * precedence. For example, if DC_FORCE_IPS_ENABLE and + * DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes + * precedence. + */ DC_DISABLE_IPS = 0x800, + + /** + * @DC_DISABLE_IPS_DYNAMIC: If set, disable all IPS, all the time, + * *except* when driver goes into suspend. + */ + DC_DISABLE_IPS_DYNAMIC = 0x1000, + + /** + * @DC_DISABLE_IPS2_DYNAMIC: If set, disable IPS2 (IPS1 allowed) if + * there is an enabled display. Otherwise, enable all IPS. + */ + DC_DISABLE_IPS2_DYNAMIC = 0x2000, + + /** + * @DC_FORCE_IPS_ENABLE: If set, force enable all IPS, all the time. + */ + DC_FORCE_IPS_ENABLE = 0x4000, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 9d7454b3c314..bb3bc68dfc39 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1257,7 +1257,6 @@ static int smu_sw_init(void *handle) atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; @@ -1265,6 +1264,7 @@ static int smu_sw_init(void *handle) smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; @@ -2224,8 +2224,9 @@ static int smu_bump_power_profile_mode(struct smu_context *smu, } static int smu_adjust_power_state_dynamic(struct smu_context *smu, - enum amd_dpm_forced_level level, - bool skip_display_settings) + enum amd_dpm_forced_level level, + bool skip_display_settings, + bool force_update) { int ret = 0; int index = 0; @@ -2254,7 +2255,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, } } - if (smu_dpm_ctx->dpm_level != level) { + if (force_update || smu_dpm_ctx->dpm_level != level) { ret = smu_asic_set_performance_level(smu, level); if (ret) { dev_err(smu->adev->dev, "Failed to set performance level!"); @@ -2271,7 +2272,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; - if (smu->power_profile_mode != workload[0]) + if (force_update || smu->power_profile_mode != workload[0]) smu_bump_power_profile_mode(smu, workload, 0); } @@ -2292,11 +2293,13 @@ static int smu_handle_task(struct smu_context *smu, ret = smu_pre_display_config_changed(smu); if (ret) return ret; - ret = smu_adjust_power_state_dynamic(smu, level, false); + ret = smu_adjust_power_state_dynamic(smu, level, false, false); break; case AMD_PP_TASK_COMPLETE_INIT: + ret = smu_adjust_power_state_dynamic(smu, level, true, true); + break; case AMD_PP_TASK_READJUST_POWER_STATE: - ret = smu_adjust_power_state_dynamic(smu, level, true); + ret = smu_adjust_power_state_dynamic(smu, level, true, false); break; default: break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index a7d0231727e8..7bc95c404377 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2378,7 +2378,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf size += sysfs_emit_at(buf, size, " "); for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) - size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i], + size += sysfs_emit_at(buf, size, "%d %-14s%s", i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "* " : " "); size += sysfs_emit_at(buf, size, "\n"); @@ -2408,7 +2408,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf do { \ size += sysfs_emit_at(buf, size, "%-30s", #field); \ for (j = 0; j <= PP_SMC_POWER_PROFILE_WINDOW3D; j++) \ - size += sysfs_emit_at(buf, size, "%-16d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ + size += sysfs_emit_at(buf, size, "%-18d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ size += sysfs_emit_at(buf, size, "\n"); \ } while (0) |