summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c584
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c247
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c365
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c496
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c292
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.h (renamed from drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h)38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c344
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c1073
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c130
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h560
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm274
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm1214
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c131
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c114
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c84
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c119
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c92
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c340
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c319
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c443
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c392
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h583
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h112
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c50
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h47
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c225
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c376
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c161
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/log_helpers.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/logger.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h579
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/custom_float.c46
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c256
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c82
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c112
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_debug.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c170
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c345
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c48
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c200
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c98
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c96
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c229
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c215
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h82
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c435
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c1362
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h330
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c349
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c1490
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h524
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c138
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h969
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/transform.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/reg_helper.h56
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq_types.h9
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h11
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/include/fixed31_32.h272
-rw-r--r--drivers/gpu/drm/amd/display/include/fixed32_32.h129
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_interface.h9
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h66
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c692
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h48
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_stats.h4
-rw-r--r--drivers/gpu/drm/amd/display/modules/stats/stats.c254
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h152
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h19
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h26
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h52
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h26
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h33
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h48
-rw-r--r--drivers/gpu/drm/amd/include/atombios.h7
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h35
-rw-r--r--drivers/gpu/drm/amd/include/cgs_common.h170
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h26
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h13
-rw-r--r--drivers/gpu/drm/amd/include/soc15_ih_clientid.h1
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h48
-rw-r--r--drivers/gpu/drm/amd/include/vega20_ip_offset.h1051
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c489
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c41
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c91
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c11
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c222
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h15
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c99
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c39
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c208
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c16
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c330
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c229
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c37
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c98
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h27
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c951
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h26
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c121
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c107
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c95
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c7
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c37
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h9
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h35
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h4
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu75.h760
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h886
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/Makefile2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c12
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c24
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c9
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c25
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c39
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c46
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c19
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c52
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c56
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c2383
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h75
318 files changed, 26852 insertions, 7830 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b5154d52..bfd332c95b61 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
- ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
- amdgpu_amdkfd_gfx_v7.o
+ ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
- vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+ vega20_reg_init.o
+
+# add DF block
+amdgpu-y += \
+ df_v1_7.o \
+ df_v3_6.o
# add GMC block
amdgpu-y += \
@@ -126,11 +131,20 @@ amdgpu-y += \
vcn_v1_0.o
# add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+ifneq ($(CONFIG_HSA_AMD),)
amdgpu-y += \
- amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
- amdgpu_amdkfd_gfx_v8.o
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
# add cgs
amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8b605f3dc05..a59c07590cee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -129,6 +129,7 @@ extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -137,6 +138,7 @@ extern int amdgpu_si_support;
extern int amdgpu_cik_support;
#endif
+#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -222,10 +224,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
};
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state);
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
@@ -771,9 +775,18 @@ struct amdgpu_rlc {
u32 starting_offsets_start;
u32 reg_list_format_size_bytes;
u32 reg_list_size_bytes;
+ u32 reg_list_format_direct_reg_list_length;
+ u32 save_restore_list_cntl_size_bytes;
+ u32 save_restore_list_gpm_size_bytes;
+ u32 save_restore_list_srm_size_bytes;
u32 *register_list_format;
u32 *register_restore;
+ u8 *save_restore_list_cntl;
+ u8 *save_restore_list_gpm;
+ u8 *save_restore_list_srm;
+
+ bool is_rlc_v2_1;
};
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -867,6 +880,8 @@ struct amdgpu_gfx_config {
/* gfx configure feature */
uint32_t double_offchip_lds_buf;
+ /* cached value of DB_DEBUG2 */
+ uint32_t db_debug2;
};
struct amdgpu_cu_info {
@@ -938,6 +953,12 @@ struct amdgpu_gfx {
uint32_t ce_feature_version;
uint32_t pfp_feature_version;
uint32_t rlc_feature_version;
+ uint32_t rlc_srlc_fw_version;
+ uint32_t rlc_srlc_feature_version;
+ uint32_t rlc_srlg_fw_version;
+ uint32_t rlc_srlg_feature_version;
+ uint32_t rlc_srls_fw_version;
+ uint32_t rlc_srls_feature_version;
uint32_t mec_feature_version;
uint32_t mec2_feature_version;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
@@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs {
/* invalidate hdp read cache */
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+ /* check if the asic needs a full reset of if soft reset will work */
+ bool (*need_full_reset)(struct amdgpu_device *adev);
};
/*
@@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs {
void (*detect_hw_virt)(struct amdgpu_device *adev);
};
-
+struct amdgpu_df_funcs {
+ void (*init)(struct amdgpu_device *adev);
+ void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+ bool enable);
+ u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+ u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+ bool enable);
+};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
GC_HWIP = 1,
@@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type {
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
+ uint32_t pp_feature;
};
#define AMDGPU_RESET_MAGIC_NUM 64
@@ -1590,6 +1626,7 @@ struct amdgpu_device {
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
const struct amdgpu_nbio_funcs *nbio_funcs;
+ const struct amdgpu_df_funcs *df_funcs;
/* delayed work_func for deferring clockgating during resume */
struct delayed_work late_init_work;
@@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index a29362f9ef41..428e5eb3444f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -290,12 +290,11 @@ static int acp_hw_init(void *handle)
else if (r)
return r;
- r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
- 0x5289, 0, &acp_base);
- if (r == -ENODEV)
- return 0;
- else if (r)
- return r;
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ acp_base = adev->rmmio_base;
+
if (adev->asic_type != CHIP_STONEY) {
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
@@ -513,7 +512,7 @@ static int acp_hw_fini(void *handle)
if (adev->acp.acp_genpd) {
for (i = 0; i < ACP_DEVS ; i++) {
dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+ ret = pm_genpd_remove_device(dev);
/* If removal fails, dont giveup and try rest */
if (ret)
dev_err(dev, "remove dev from genpd failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203ffb11..8f6f45567bfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void)
kgd2kfd = NULL;
}
+
#elif defined(CONFIG_HSA_AMD)
+
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
#else
+ kgd2kfd = NULL;
ret = -ENOENT;
#endif
+
+#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
amdgpu_amdkfd_gpuvm_init_mem_limits();
+#endif
return ret;
}
@@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+ break;
default:
- dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
+ dev_info(adev->dev, "kfd not supported on this ASIC\n");
return;
}
@@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
+ if (adev->asic_type >= CHIP_VEGA10) {
+ /* On SOC15 the BIF is involved in routing
+ * doorbells using the low 12 bits of the
+ * address. Communicate the assignments to
+ * KFD. KFD uses two doorbell pages per
+ * process in case of 64-bit doorbells so we
+ * can use each doorbell assignment twice.
+ */
+ gpu_resources.sdma_doorbell[0][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ gpu_resources.sdma_doorbell[0][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+ gpu_resources.sdma_doorbell[1][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ gpu_resources.sdma_doorbell[1][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+ * SDMA, IH and VCN. So don't use them for the CP.
+ */
+ gpu_resources.reserved_doorbell_mask = 0x1f0;
+ gpu_resources.reserved_doorbell_val = 0x0f0;
+ }
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
@@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_param bp;
int r;
uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
- NULL, &bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
@@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+
+#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+ return false;
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+ return NULL;
+}
+
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+ return 0;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
+{
+ return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
+{
+ return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return NULL;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea731e0..a8418a3f4e9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
+#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
+ atomic_t invalid;
struct amdkfd_process_info *process_info;
+ struct page **user_pages;
struct amdgpu_sync sync;
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
+ /* List of userptr BOs that are valid or invalid */
+ struct list_head userptr_valid_list;
+ struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
+
+ /* MMU-notifier related fields */
+ atomic_t evicted_bos;
+ struct delayed_work restore_userptr_work;
+ struct pid *pid;
};
int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- void **process_info,
- struct dma_fence **ef);
+ void **process_info,
+ struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp,
- void **vm, void **process_info,
- struct dma_fence **ef);
+ struct file *filp,
+ void **vm, void **process_info,
+ struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53172b9..0ff36d45a597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9a5e9f..6ef9762b4b00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000000..f0c0d3953f69
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
+
+#define V9_PIPE_PER_MEC (4)
+#define V9_QUEUES_PER_PIPE_MEC (8)
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_local_mem_info = get_local_mem_info,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .alloc_pasid = amdgpu_pasid_alloc,
+ .free_pasid = amdgpu_pasid_free,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id) & 31;
+
+ return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << (vmid + 16))))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << (vmid + 16));
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+ };
+ uint32_t retval;
+
+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+ mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uintptr_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ data = RREG32(sdmax_gfx_context_cntl);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(sdmax_gfx_context_cntl, data);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v9_mqd *m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ uint32_t req = (1 << vmid) |
+ (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+ req);
+
+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ if (ring->ready)
+ return invalidate_tlbs_with_kiq(adev, pasid);
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+ == pasid) {
+ write_vmid_invalidate_request(kgd, vmid);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return 0;
+ }
+
+ write_vmid_invalidate_request(kgd, vmid);
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ /* No longer needed on GFXv9. The scratch base address is
+ * passed to the shader by the CP. It's the user mode driver's
+ * responsibility.
+ */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+ AMDGPU_PTE_VALID;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* TODO: take advantage of per-process address space size. For
+ * now, all processes share the same address space size, like
+ * on GFX8 and older.
+ */
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e1479da38..ff8fd75f7ca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,8 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -33,10 +35,20 @@
*/
#define VI_BO_SIZE_ALIGN (0x8000)
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
+ uint64_t max_userptr_mem_limit;
int64_t system_mem_used;
+ int64_t userptr_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = {
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
+ * Userptr memory - 3/4th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- pr_debug("Kernel memory limit %lluM\n",
- (kfd_mem_limit.max_system_mem_limit >> 20));
+ kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+ pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20),
+ (kfd_mem_limit.max_userptr_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ if ((kfd_mem_limit.system_mem_used + acc_size >
+ kfd_mem_limit.max_system_mem_limit) ||
+ (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+ kfd_mem_limit.max_userptr_mem_limit)) {
+ ret = -ENOMEM;
+ goto err_no_mem;
+ }
+ kfd_mem_limit.system_mem_used += acc_size;
+ kfd_mem_limit.userptr_mem_used += size;
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.userptr_mem_used -= size;
+ }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+ kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
- struct amdkfd_process_info *process_info)
+ struct amdkfd_process_info *process_info,
+ bool userptr)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ if (userptr)
+ list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ else
+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ uint64_t user_addr)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ struct amdgpu_bo *bo = mem->bo;
+ struct ttm_operation_ctx ctx = { true, false };
+ int ret = 0;
+
+ mutex_lock(&process_info->lock);
+
+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+ if (ret) {
+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = amdgpu_mn_register(bo, user_addr);
+ if (ret) {
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ /* If no restore worker is running concurrently, user_pages
+ * should not be allocated
+ */
+ WARN(mem->user_pages, "Leaking user_pages array");
+
+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n", __func__);
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ if (ret) {
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ goto free_out;
+ }
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("%s: Failed to reserve BO\n", __func__);
+ goto release_out;
+ }
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ pr_err("%s: failed to validate BO\n", __func__);
+ amdgpu_bo_unreserve(bo);
+
+release_out:
+ if (ret)
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+unregister_out:
+ if (ret)
+ amdgpu_mn_unregister(bo);
+out:
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
@@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+ bool no_update_pte)
{
int ret;
@@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret;
}
+ if (no_update_pte)
+ return 0;
+
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
+ INIT_LIST_HEAD(&info->userptr_valid_list);
+ INIT_LIST_HEAD(&info->userptr_inval_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
goto create_evict_fence_fail;
}
+ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ atomic_set(&info->evicted_bos, 0);
+ INIT_DELAYED_WORK(&info->restore_userptr_work,
+ amdgpu_amdkfd_restore_userptr_worker);
+
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -872,6 +1006,7 @@ reserve_pd_fail:
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL;
+ put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
@@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
dma_fence_put(&process_info->eviction_fence->base);
+ cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
@@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ uint64_t user_addr = 0;
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int byte_align;
- u32 alloc_domain;
+ u32 domain, alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
@@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
- alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
- alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = *offset;
} else {
return -EINVAL;
}
@@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
- ret = amdgpu_bo_create(adev, size, byte_align,
- alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = byte_align;
+ bp.domain = alloc_domain;
+ bp.flags = alloc_flags;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+ ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
@@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
+ if (user_addr)
+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
(*mem)->va = va;
- (*mem)->domain = alloc_domain;
+ (*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
- add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+ if (user_addr) {
+ ret = init_user_pages(*mem, current->mm, user_addr);
+ if (ret) {
+ mutex_lock(&avm->process_info->lock);
+ list_del(&(*mem)->validate_list.head);
+ mutex_unlock(&avm->process_info->lock);
+ goto allocate_init_user_pages_failed;
+ }
+ }
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
+allocate_init_user_pages_failed:
+ amdgpu_bo_unref(&bo);
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_system_mem;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
@@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* be freed anyway
*/
+ /* No more MMU notifiers */
+ amdgpu_mn_unregister(mem->bo);
+
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
+ /* Free user pages if necessary */
+ if (mem->user_pages) {
+ pr_debug("%s: Freeing user_pages array\n", __func__);
+ if (mem->user_pages[0])
+ release_pages(mem->user_pages,
+ mem->bo->tbo.ttm->num_pages);
+ kvfree(mem->user_pages);
+ }
+
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
-
- /* Make sure restore is not running concurrently.
- */
- mutex_lock(&mem->process_info->lock);
-
- mutex_lock(&mem->lock);
+ bool is_invalid_userptr = false;
bo = mem->bo;
-
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ /* Make sure restore is not running concurrently. Since we
+ * don't map invalid userptr BOs, we rely on the next restore
+ * worker to do the mapping
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ down_write(&current->mm->mmap_sem);
+ is_invalid_userptr = atomic_read(&mem->invalid);
+ up_write(&current->mm->mmap_sem);
+ }
+
+ mutex_lock(&mem->lock);
+
domain = mem->domain;
bo_size = bo->tbo.mem.size;
@@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out;
+ /* Userptr can be marked as "not invalid", but not actually be
+ * validated yet (still in the system domain). In that case
+ * the queues are still stopped and we can leave mapping for
+ * the next restore worker
+ */
+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ is_invalid_userptr = true;
+
if (check_if_add_bo_to_vm(avm, mem)) {
ret = add_bo_to_vm(adev, mem, avm, false,
&bo_va_entry);
@@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto add_bo_to_vm_failed;
}
- if (mem->mapped_to_gpu_memory == 0) {
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
@@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size,
entry);
- ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1621,337 @@ bo_reserve_failed:
return ret;
}
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+ struct mm_struct *mm)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ int invalid, evicted_bos;
+ int r = 0;
+
+ invalid = atomic_inc_return(&mem->invalid);
+ evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+ if (evicted_bos == 1) {
+ /* First eviction, stop the queues */
+ r = kgd2kfd->quiesce_mm(mm);
+ if (r)
+ pr_err("Failed to quiesce KFD\n");
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ }
+
+ return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ struct mm_struct *mm)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int invalid, ret;
+
+ /* Move all invalidated BOs to the userptr_inval_list and
+ * release their user pages by migration to the CPU domain
+ */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_valid_list,
+ validate_list.head) {
+ if (!atomic_read(&mem->invalid))
+ continue; /* BO is still valid */
+
+ bo = mem->bo;
+
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_inval_list);
+ }
+
+ if (list_empty(&process_info->userptr_inval_list))
+ return 0; /* All evicted userptr BOs were freed */
+
+ /* Go through userptr_inval_list and update any invalid user_pages */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ invalid = atomic_read(&mem->invalid);
+ if (!invalid)
+ /* BO hasn't been invalidated since the last
+ * revalidation attempt. Keep its BO list.
+ */
+ continue;
+
+ bo = mem->bo;
+
+ if (!mem->user_pages) {
+ mem->user_pages =
+ kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n",
+ __func__);
+ return -ENOMEM;
+ }
+ } else if (mem->user_pages[0]) {
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ }
+
+ /* Get updated user pages */
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ if (ret) {
+ mem->user_pages[0] = NULL;
+ pr_info("%s: Failed to get user pages: %d\n",
+ __func__, ret);
+ /* Pretend it succeeded. It will fail later
+ * with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with
+ * stalled user mode queues.
+ */
+ }
+
+ /* Mark the BO as valid unless it was invalidated
+ * again concurrently
+ */
+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_bo_list_entry *pd_bo_list_entries;
+ struct list_head resv_list, duplicates;
+ struct ww_acquire_ctx ticket;
+ struct amdgpu_sync sync;
+
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int i, ret;
+
+ pd_bo_list_entries = kcalloc(process_info->n_vms,
+ sizeof(struct amdgpu_bo_list_entry),
+ GFP_KERNEL);
+ if (!pd_bo_list_entries) {
+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&resv_list);
+ INIT_LIST_HEAD(&duplicates);
+
+ /* Get all the page directory BOs that need to be reserved */
+ i = 0;
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+ &pd_bo_list_entries[i++]);
+ /* Add the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ list_add_tail(&mem->resv_list.head, &resv_list);
+ mem->resv_list.bo = mem->validate_list.bo;
+ mem->resv_list.shared = mem->validate_list.shared;
+ }
+
+ /* Reserve all BOs and page tables for validation */
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ WARN(!list_empty(&duplicates), "Duplicates should be empty");
+ if (ret)
+ goto out;
+
+ amdgpu_sync_create(&sync);
+
+ /* Avoid triggering eviction fences when unmapping invalid
+ * userptr BOs (waits for all fences, doesn't use
+ * FENCE_OWNER_VM)
+ */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto unreserve_out;
+
+ /* Validate BOs and update GPUVM page tables */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ struct kfd_bo_va_list *bo_va_entry;
+
+ bo = mem->bo;
+
+ /* Copy pages array and validate the BO if we got user pages */
+ if (mem->user_pages[0]) {
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret) {
+ pr_err("%s: failed to validate BO\n", __func__);
+ goto unreserve_out;
+ }
+ }
+
+ /* Validate succeeded, now the BO owns the pages, free
+ * our copy of the pointer array. Put this BO back on
+ * the userptr_valid_list. If we need to revalidate
+ * it, we need to start from scratch.
+ */
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_valid_list);
+
+ /* Update mapping. If the BO was not validated
+ * (because we couldn't get user pages), this will
+ * clear the page table entries, which will result in
+ * VM faults if the GPU tries to access the invalid
+ * memory.
+ */
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+ if (!bo_va_entry->is_mapped)
+ continue;
+
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry, &sync);
+ if (ret) {
+ pr_err("%s: update PTE failed\n", __func__);
+ /* make sure this gets validated again */
+ atomic_inc(&mem->invalid);
+ goto unreserve_out;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_bo_fence(peer_vm->root.base.bo,
+ &process_info->eviction_fence->base, true);
+ ttm_eu_backoff_reservation(&ticket, &resv_list);
+ amdgpu_sync_wait(&sync, false);
+ amdgpu_sync_free(&sync);
+out:
+ kfree(pd_bo_list_entries);
+
+ return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info =
+ container_of(dwork, struct amdkfd_process_info,
+ restore_userptr_work);
+ struct task_struct *usertask;
+ struct mm_struct *mm;
+ int evicted_bos;
+
+ evicted_bos = atomic_read(&process_info->evicted_bos);
+ if (!evicted_bos)
+ return;
+
+ /* Reference task and mm in case of concurrent process termination */
+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ if (!usertask)
+ return;
+ mm = get_task_mm(usertask);
+ if (!mm) {
+ put_task_struct(usertask);
+ return;
+ }
+
+ mutex_lock(&process_info->lock);
+
+ if (update_invalid_user_pages(process_info, mm))
+ goto unlock_out;
+ /* userptr_inval_list can be empty if all evicted userptr BOs
+ * have been freed. In that case there is nothing to validate
+ * and we can just restart the queues.
+ */
+ if (!list_empty(&process_info->userptr_inval_list)) {
+ if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+ goto unlock_out; /* Concurrent eviction, try again */
+
+ if (validate_invalid_user_pages(process_info))
+ goto unlock_out;
+ }
+ /* Final check for concurrent evicton and atomic update. If
+ * another eviction happens after successful update, it will
+ * be a first eviction that calls quiesce_mm. The eviction
+ * reference counting inside KFD will handle this case.
+ */
+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+ evicted_bos)
+ goto unlock_out;
+ evicted_bos = 0;
+ if (kgd2kfd->resume_mm(mm)) {
+ pr_err("%s: Failed to resume KFD\n", __func__);
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ }
+unlock_out:
+ mutex_unlock(&process_info->lock);
+ mmput(mm);
+ put_task_struct(usertask);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_bos)
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index a0f48cb9b8f0..236915849cfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
return ret;
}
+
+union gfx_info {
+ struct atom_gfx_info_v2_4 v24;
+};
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ gfx_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union gfx_info *gfx_info = (union gfx_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ }
+ return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 7689c961c4ef..20f158fd3b76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1ae5ae8c45a4..1bcb2b247335 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void)
* look up whether we are the integrated or discrete GPU (all asics).
* Returns the client id.
*/
-static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
{
if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
return VGA_SWITCHEROO_IGD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02b849be083b..19cfff31f2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
{
struct amdgpu_bo *dobj = NULL;
struct amdgpu_bo *sobj = NULL;
+ struct amdgpu_bo_param bp;
uint64_t saddr, daddr;
int r, n;
int time;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = sdomain;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
- ttm_bo_type_kernel, NULL, &sobj);
+ r = amdgpu_bo_create(adev, &bp, &sobj);
if (r) {
goto out_cleanup;
}
@@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
if (r) {
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
- ttm_bo_type_kernel, NULL, &dobj);
+ bp.domain = ddomain;
+ r = amdgpu_bo_create(adev, &bp, &dobj);
if (r) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 71a57b2f7f04..e950730f1933 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -23,7 +23,6 @@
*/
#include <linux/list.h>
#include <linux/slab.h>
-#include <linux/pci.h>
#include <drm/drmP.h>
#include <linux/firmware.h>
#include <drm/amdgpu_drm.h>
@@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
WARN(1, "Invalid indirect register space");
}
-static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
- enum cgs_resource_type resource_type,
- uint64_t size,
- uint64_t offset,
- uint64_t *resource_base)
-{
- CGS_FUNC_ADEV;
-
- if (resource_base == NULL)
- return -EINVAL;
-
- switch (resource_type) {
- case CGS_RESOURCE_TYPE_MMIO:
- if (adev->rmmio_size == 0)
- return -ENOENT;
- if ((offset + size) > adev->rmmio_size)
- return -EINVAL;
- *resource_base = adev->rmmio_base;
- return 0;
- case CGS_RESOURCE_TYPE_DOORBELL:
- if (adev->doorbell.size == 0)
- return -ENOENT;
- if ((offset + size) > adev->doorbell.size)
- return -EINVAL;
- *resource_base = adev->doorbell.base;
- return 0;
- case CGS_RESOURCE_TYPE_FB:
- case CGS_RESOURCE_TYPE_IO:
- case CGS_RESOURCE_TYPE_ROM:
- default:
- return -EINVAL;
- }
-}
-
-static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
- unsigned table, uint16_t *size,
- uint8_t *frev, uint8_t *crev)
-{
- CGS_FUNC_ADEV;
- uint16_t data_start;
-
- if (amdgpu_atom_parse_data_header(
- adev->mode_info.atom_context, table, size,
- frev, crev, &data_start))
- return (uint8_t*)adev->mode_info.atom_context->bios +
- data_start;
-
- return NULL;
-}
-
-static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
- uint8_t *frev, uint8_t *crev)
-{
- CGS_FUNC_ADEV;
-
- if (amdgpu_atom_parse_cmd_header(
- adev->mode_info.atom_context, table,
- frev, crev))
- return 0;
-
- return -EINVAL;
-}
-
-static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
- void *args)
-{
- CGS_FUNC_ADEV;
-
- return amdgpu_atom_execute_table(
- adev->mode_info.atom_context, table, args);
-}
-
-static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_clockgating_state state)
-{
- CGS_FUNC_ADEV;
- int i, r = -1;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
-
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev,
- state);
- break;
- }
- }
- return r;
-}
-
-static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_powergating_state state)
-{
- CGS_FUNC_ADEV;
- int i, r = -1;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
-
- if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev,
- state);
- break;
- }
- }
- return r;
-}
-
-
static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
{
CGS_FUNC_ADEV;
@@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
return result;
}
-static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
-{
- CGS_FUNC_ADEV;
- if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
- release_firmware(adev->pm.fw);
- adev->pm.fw = NULL;
- return 0;
- }
- /* cannot release other firmware because they are not created by cgs */
- return -EINVAL;
-}
-
static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
enum cgs_ucode_id type)
{
@@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
return fw_version;
}
-static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
- bool en)
-{
- CGS_FUNC_ADEV;
-
- if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
- adev->gfx.rlc.funcs->exit_safe_mode == NULL)
- return 0;
-
- if (en)
- adev->gfx.rlc.funcs->enter_safe_mode(adev);
- else
- adev->gfx.rlc.funcs->exit_safe_mode(adev);
-
- return 0;
-}
-
-static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
- bool lock)
-{
- CGS_FUNC_ADEV;
-
- if (lock)
- mutex_lock(&adev->grbm_idx_mutex);
- else
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
enum cgs_ucode_id type,
struct cgs_firmware_info *info)
@@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_POLARIS12:
strcpy(fw_name, "amdgpu/polaris12_smc.bin");
break;
+ case CHIP_VEGAM:
+ strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
@@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
case CHIP_VEGA12:
strcpy(fw_name, "amdgpu/vega12_smc.bin");
break;
+ case CHIP_VEGA20:
+ strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
@@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
return 0;
}
-static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
-{
- CGS_FUNC_ADEV;
- return amdgpu_sriov_vf(adev);
-}
-
-static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
- struct cgs_display_info *info)
-{
- CGS_FUNC_ADEV;
- struct cgs_mode_info *mode_info;
-
- if (info == NULL)
- return -EINVAL;
-
- mode_info = info->mode_info;
- if (mode_info)
- /* if the displays are off, vblank time is max */
- mode_info->vblank_time_us = 0xffffffff;
-
- if (!amdgpu_device_has_dc_support(adev)) {
- struct amdgpu_crtc *amdgpu_crtc;
- struct drm_device *ddev = adev->ddev;
- struct drm_crtc *crtc;
- uint32_t line_time_us, vblank_lines;
-
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc,
- &ddev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (crtc->enabled) {
- info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
- info->display_count++;
- }
- if (mode_info != NULL &&
- crtc->enabled && amdgpu_crtc->enabled &&
- amdgpu_crtc->hw_mode.clock) {
- line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
- amdgpu_crtc->hw_mode.clock;
- vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
- amdgpu_crtc->hw_mode.crtc_vdisplay +
- (amdgpu_crtc->v_border * 2);
- mode_info->vblank_time_us = vblank_lines * line_time_us;
- mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
- /* we have issues with mclk switching with refresh rates
- * over 120 hz on the non-DC code.
- */
- if (mode_info->refresh_rate > 120)
- mode_info->vblank_time_us = 0;
- mode_info = NULL;
- }
- }
- }
- } else {
- info->display_count = adev->pm.pm_display_cfg.num_display;
- if (mode_info != NULL) {
- mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
- mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
- }
- }
- return 0;
-}
-
-
-static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
-{
- CGS_FUNC_ADEV;
-
- adev->pm.dpm_enabled = enabled;
-
- return 0;
-}
-
static const struct cgs_ops amdgpu_cgs_ops = {
.read_register = amdgpu_cgs_read_register,
.write_register = amdgpu_cgs_write_register,
.read_ind_register = amdgpu_cgs_read_ind_register,
.write_ind_register = amdgpu_cgs_write_ind_register,
- .get_pci_resource = amdgpu_cgs_get_pci_resource,
- .atom_get_data_table = amdgpu_cgs_atom_get_data_table,
- .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
- .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
.get_firmware_info = amdgpu_cgs_get_firmware_info,
- .rel_firmware = amdgpu_cgs_rel_firmware,
- .set_powergating_state = amdgpu_cgs_set_powergating_state,
- .set_clockgating_state = amdgpu_cgs_set_clockgating_state,
- .get_active_displays_info = amdgpu_cgs_get_active_displays_info,
- .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
- .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
- .enter_safe_mode = amdgpu_cgs_enter_safe_mode,
- .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
};
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 96501ff0e55b..8e66851eb427 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
return ret;
}
-static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
return ret;
}
-static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
amdgpu_connector->use_digital = true;
}
-static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
@@ -1448,7 +1448,7 @@ out:
return ret;
}
-static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50e6b29..9c1d491d742e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -382,8 +382,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+ amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u64 initial_bytes_moved, bytes_moved;
bool update_bytes_moved_vis;
uint32_t other;
@@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
continue;
/* Good we can try to move this BO somewhere else */
- amdgpu_ttm_placement_from_domain(bo, other);
update_bytes_moved_vis =
adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
- initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+ amdgpu_bo_in_cpu_visible_vram(bo);
+ amdgpu_ttm_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- bytes_moved = atomic64_read(&adev->num_bytes_moved) -
- initial_bytes_moved;
- p->bytes_moved += bytes_moved;
+ p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
- p->bytes_moved_vis += bytes_moved;
+ p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r))
break;
@@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev);
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3fabf9f97022..c5bb36275e93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
continue;
r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
- rq, amdgpu_sched_jobs, &ctx->guilty);
+ rq, &ctx->guilty);
if (r)
goto failed;
}
@@ -111,8 +111,9 @@ failed:
return r;
}
-static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct kref *ref)
{
+ struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
@@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
kfree(ctx->fences);
ctx->fences = NULL;
- for (i = 0; i < adev->num_rings; i++)
- drm_sched_entity_fini(&adev->rings[i]->sched,
- &ctx->rings[i].entity);
-
amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
mutex_destroy(&ctx->lock);
+
+ kfree(ctx);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
+ u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
- amdgpu_ctx_fini(ctx);
+ for (i = 0; i < ctx->adev->num_rings; i++) {
- kfree(ctx);
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity);
+ }
+
+ amdgpu_ctx_fini(ref);
}
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@@ -437,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
idr_init(&mgr->ctx_handles);
}
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i;
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id) {
+
+ if (!ctx->adev)
+ return;
+
+ for (i = 0; i < ctx->adev->num_rings; i++) {
+
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ if (kref_read(&ctx->refcount) == 1)
+ drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity);
+ else
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ }
+ }
+}
+
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
+{
+ struct amdgpu_ctx *ctx;
+ struct idr *idp;
+ uint32_t id, i;
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id) {
+
+ if (!ctx->adev)
+ return;
+
+ for (i = 0; i < ctx->adev->num_rings; i++) {
+
+ if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+ continue;
+
+ if (kref_read(&ctx->refcount) == 1)
+ drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
+ &ctx->rings[i].entity);
+ else
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ }
+ }
+}
+
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id;
+ amdgpu_ctx_mgr_entity_cleanup(mgr);
+
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
+ if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
DRM_ERROR("ctx %p is still alive\n", ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 448d69fe3756..f5fb93795a69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -28,8 +28,13 @@
#include <linux/debugfs.h>
#include "amdgpu.h"
-/*
- * Debugfs
+/**
+ * amdgpu_debugfs_add_files - Add simple debugfs entries
+ *
+ * @adev: Device to attach debugfs entries to
+ * @files: Array of function callbacks that respond to reads
+ * @nfiles: Number of callbacks to register
+ *
*/
int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
const struct drm_info_list *files,
@@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
#if defined(CONFIG_DEBUG_FS)
-
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos: Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62: Indicates a GRBM bank switch is needed
+ * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
+ * zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23: Indicates that the PM power gating lock should be held
+ * This is necessary to read registers that might be
+ * unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
char __user *buf, size_t size, loff_t *pos)
{
@@ -164,19 +195,37 @@ end:
return result;
}
-
+/**
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
}
+/**
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
}
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return result;
}
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
return result;
}
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read. This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos: Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write. This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return result;
}
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion. The format is a series of DWORDs with the first
+ * indicating which revision it is. New content is appended to the
+ * end so that older software can still read the data.
+ */
+
static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
if (size & 3 || *pos & 0x3)
return -EINVAL;
- if (amdgpu_dpm == 0)
+ if (!adev->pm.dpm_enabled)
return -EINVAL;
/* convert offset to sensor number */
@@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
return !r ? outsize : r;
}
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..6: Byte offset into data
+ * Bits 7..14: SE selector
+ * Bits 15..22: SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used. See gfx_v8_0_read_wave_data() for an example output.
+ */
static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
return result;
}
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos: Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for. The bits are used as follows:
+ *
+ * Bits 0..11: Byte offset into data
+ * Bits 12..19: SE selector
+ * Bits 20..27: SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = {
"amdgpu_gpr",
};
+/**
+ * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
+ * register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
struct drm_minor *minor = adev->ddev->primary;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 34af664b9f93..290e279abf0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = {
"POLARIS10",
"POLARIS11",
"POLARIS12",
+ "VEGAM",
"VEGA10",
"VEGA12",
+ "VEGA20",
"RAVEN",
"LAST",
};
@@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
{
u64 size_af, size_bf;
+ mc->gart_size += adev->pm.smu_prv_buffer_size;
+
size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
@@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
}
}
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+ struct sysinfo si;
+ bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+ uint64_t total_memory;
+ uint64_t dram_size_seven_GB = 0x1B8000000;
+ uint64_t dram_size_three_GB = 0xB8000000;
+
+ if (amdgpu_smu_memory_pool_size == 0)
+ return;
+
+ if (!is_os_64) {
+ DRM_WARN("Not 64-bit OS, feature not supported\n");
+ goto def_value;
+ }
+ si_meminfo(&si);
+ total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+ if ((amdgpu_smu_memory_pool_size == 1) ||
+ (amdgpu_smu_memory_pool_size == 2)) {
+ if (total_memory < dram_size_three_GB)
+ goto def_value1;
+ } else if ((amdgpu_smu_memory_pool_size == 4) ||
+ (amdgpu_smu_memory_pool_size == 8)) {
+ if (total_memory < dram_size_seven_GB)
+ goto def_value1;
+ } else {
+ DRM_WARN("Smu memory pool size not supported\n");
+ goto def_value;
+ }
+ adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+ return;
+
+def_value1:
+ DRM_WARN("No enough system memory\n");
+def_value:
+ adev->pm.smu_prv_buffer_size = 0;
+}
+
/**
* amdgpu_device_check_arguments - validate module params
*
@@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_fragment_size = -1;
}
+ amdgpu_device_check_smu_prv_buffer_size(adev);
+
amdgpu_device_check_vm_size(adev);
amdgpu_device_check_block_size(adev);
@@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
* the hardware IP specified.
* Returns the error code from the last instance.
*/
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
* the hardware IP specified.
* Returns the error code from the last instance.
*/
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
{
+ struct amdgpu_device *adev = dev;
int i, r = 0;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
+ case CHIP_VEGA20:
default:
return 0;
case CHIP_VEGA10:
@@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_TOPAZ:
case CHIP_TONGA:
case CHIP_FIJI:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_CARRIZO:
case CHIP_STONEY:
if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
if (adev->asic_type == CHIP_RAVEN)
adev->family = AMDGPU_FAMILY_RV;
@@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return -EAGAIN;
}
+ adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
if (amdgpu_emu_mode == 1)
return 0;
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1704,8 +1763,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
}
}
- mod_delayed_work(system_wq, &adev->late_init_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
+ queue_delayed_work(system_wq, &adev->late_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
amdgpu_device_fill_reset_magic(adev);
@@ -1759,6 +1818,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1850,6 +1910,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
+ /* ungate SMC block powergating */
+ if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+ amdgpu_device_ip_set_powergating_state(adev,
+ AMD_IP_BLOCK_TYPE_SMC,
+ AMD_CG_STATE_UNGATE);
+
/* ungate SMC block first */
r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
AMD_CG_STATE_UNGATE);
@@ -2086,16 +2152,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_MULLINS:
case CHIP_CARRIZO:
case CHIP_STONEY:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_TONGA:
case CHIP_FIJI:
-#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
- return amdgpu_dc != 0;
-#endif
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case CHIP_RAVEN:
#endif
@@ -2375,10 +2440,6 @@ fence_driver_init:
goto failed;
}
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
-
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
@@ -2539,7 +2600,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
+ struct drm_framebuffer *fb = crtc->primary->fb;
struct amdgpu_bo *robj;
if (amdgpu_crtc->cursor_bo) {
@@ -2551,10 +2612,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
}
}
- if (rfb == NULL || rfb->obj == NULL) {
+ if (fb == NULL || fb->obj[0] == NULL) {
continue;
}
- robj = gem_to_amdgpu_bo(rfb->obj);
+ robj = gem_to_amdgpu_bo(fb->obj[0]);
/* don't unpin kernel fb objects */
if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
r = amdgpu_bo_reserve(robj, true);
@@ -2640,11 +2701,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
}
amdgpu_fence_driver_resume(adev);
- if (resume) {
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
- }
r = amdgpu_device_ip_late_init(adev);
if (r)
@@ -2736,6 +2792,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return true;
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -2792,6 +2851,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
{
int i;
+ if (amdgpu_asic_need_full_reset(adev))
+ return true;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -3087,20 +3149,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
- amdgpu_virt_release_full_gpu(adev, true);
if (r)
goto error;
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
+error:
+ amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter);
r = amdgpu_device_handle_vram_lost(adev);
}
-error:
-
return r;
}
@@ -3117,7 +3178,6 @@ error:
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
{
- struct drm_atomic_state *state = NULL;
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -3140,10 +3200,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
- /* store modesetting */
- if (amdgpu_device_has_dc_support(adev))
- state = drm_atomic_helper_suspend(adev->ddev);
-
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -3183,10 +3239,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
kthread_unpark(ring->sched.thread);
}
- if (amdgpu_device_has_dc_support(adev)) {
- if (drm_atomic_helper_resume(adev->ddev, state))
- dev_info(adev->dev, "drm resume failed:%d\n", r);
- } else {
+ if (!amdgpu_device_has_dc_support(adev)) {
drm_helper_resume_force_mode(adev->ddev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 93f700ab1bfb..76ee8e04ff11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
static void amdgpu_display_flip_callback(struct dma_fence *f,
@@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_framebuffer *old_amdgpu_fb;
- struct amdgpu_framebuffer *new_amdgpu_fb;
struct drm_gem_object *obj;
struct amdgpu_flip_work *work;
struct amdgpu_bo *new_abo;
@@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
/* schedule unpin of the old buffer */
- old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- obj = old_amdgpu_fb->obj;
+ obj = crtc->primary->fb->obj[0];
/* take a reference to the old object */
work->old_abo = gem_to_amdgpu_bo(obj);
amdgpu_bo_ref(work->old_abo);
- new_amdgpu_fb = to_amdgpu_framebuffer(fb);
- obj = new_amdgpu_fb->obj;
+ obj = fb->obj[0];
new_abo = gem_to_amdgpu_bo(obj);
/* pin the new buffer */
@@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto cleanup;
}
- r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
+ r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
@@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
return true;
}
-static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
- struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
- drm_gem_object_put_unlocked(amdgpu_fb->obj);
- drm_framebuffer_cleanup(fb);
- kfree(amdgpu_fb);
-}
-
-static int amdgpu_display_user_framebuffer_create_handle(
- struct drm_framebuffer *fb,
- struct drm_file *file_priv,
- unsigned int *handle)
-{
- struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
- return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
-}
-
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
- .destroy = amdgpu_display_user_framebuffer_destroy,
- .create_handle = amdgpu_display_user_framebuffer_create_handle,
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
};
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
{
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
struct drm_gem_object *obj)
{
int ret;
- rfb->obj = obj;
+ rfb->base.obj[0] = obj;
drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
if (ret) {
- rfb->obj = NULL;
+ rfb->base.obj[0] = NULL;
return ret;
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 2b11d808f297..f66e3e3fef0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,7 +23,7 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index e997ebbe43ea..def1010ac05e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
pr_cont("\n");
}
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev->ddev;
+ struct drm_crtc *crtc;
+ struct amdgpu_crtc *amdgpu_crtc;
+
+ adev->pm.dpm.new_active_crtcs = 0;
+ adev->pm.dpm.new_active_crtc_count = 0;
+ if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
+ list_for_each_entry(crtc,
+ &ddev->mode_config.crtc_list, head) {
+ amdgpu_crtc = to_amdgpu_crtc(crtc);
+ if (amdgpu_crtc->enabled) {
+ adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
+ adev->pm.dpm.new_active_crtc_count++;
+ }
+ }
+ }
+}
+
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 643d008410c6..dd6203a0a6b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src {
AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
};
-#define SCLK_DEEP_SLEEP_MASK 0x8
-
struct amdgpu_ps {
u32 caps; /* vbios flags */
u32 class; /* vbios flags */
@@ -349,12 +347,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
-#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
- virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
- ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
- (adev)->powerplay.pp_handle, virtual_addr_low, \
- virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
-
#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
((adev)->powerplay.pp_funcs->get_power_profile_mode(\
(adev)->powerplay.pp_handle, buf))
@@ -445,6 +437,8 @@ struct amdgpu_pm {
uint32_t pcie_gen_mask;
uint32_t pcie_mlw_mask;
struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
+ uint32_t smu_prv_buffer_size;
+ struct amdgpu_bo *smu_prv_buffer;
};
#define R600_SSTU_DFLT 0
@@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
struct amdgpu_ps *rps);
u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
bool amdgpu_is_uvd_state(u32 class, u32 class2);
void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
u32 *p, u32 *u);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0b19482b36b8..b0bf2f24da48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -75,9 +75,10 @@
* - 3.23.0 - Add query for VRAM lost counter
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 25
+#define KMS_DRIVER_MINOR 26
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@@ -132,6 +133,7 @@ int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode = 0;
+uint amdgpu_smu_memory_pool_size = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
+MODULE_PARM_DESC(smu_memory_pool_size,
+ "reserve gtt for smu debug usage, 0 = disable,"
+ "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+ /* VEGAM */
+ {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+ {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
/* Vega 10 */
{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ /* Vega 20 */
+ {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 12063019751b..bc5fd8ebab5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
fb_tiled);
- domain = amdgpu_display_framebuffer_domains(adev);
+ domain = amdgpu_display_supported_domains(adev);
height = ALIGN(mode_cmd->height, 8);
size = mode_cmd->pitches[0] * height;
@@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
drm_fb_helper_unregister_fbi(&rfbdev->helper);
- if (rfb->obj) {
- amdgpufb_destroy_pinned_object(rfb->obj);
- rfb->obj = NULL;
+ if (rfb->base.obj[0]) {
+ amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
+ rfb->base.obj[0] = NULL;
drm_framebuffer_unregister_private(&rfb->base);
drm_framebuffer_cleanup(&rfb->base);
}
@@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
if (!adev->mode_info.rfbdev)
return 0;
- robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj);
+ robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
size += amdgpu_bo_size(robj);
return size;
}
@@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
{
if (!adev->mode_info.rfbdev)
return false;
- if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj))
+ if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
return true;
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97449e06a242..39ec6b8890a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+ unsigned flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
adev->fence_context + ring->idx,
seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- seq, AMDGPU_FENCE_FLAG_INT);
+ seq, flags | AMDGPU_FENCE_FLAG_INT);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
/* This function can't be called concurrently anyway, otherwise
@@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
uint64_t index;
- if (ring != &adev->uvd.ring) {
+ if (ring != &adev->uvd.inst[ring->me].ring) {
ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
} else {
/* put fence directly behind firmware */
index = ALIGN(adev->uvd.fw->size, 8);
- ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
- ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
+ ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+ ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index cf0f186c6092..17d6b9fb6d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
int r;
if (adev->gart.robj == NULL) {
- r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL,
- &adev->gart.robj);
+ struct amdgpu_bo_param bp;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->gart.table_size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
if (r) {
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 46b9ea4e6103..2c8e27370284 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
struct drm_gem_object **obj)
{
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int r;
+ memset(&bp, 0, sizeof(bp));
*obj = NULL;
/* At least align on page size */
if (alignment < PAGE_SIZE) {
alignment = PAGE_SIZE;
}
+ bp.size = size;
+ bp.byte_align = alignment;
+ bp.type = type;
+ bp.resv = resv;
+ bp.preferred_domain = initial_domain;
retry:
- r = amdgpu_bo_create(adev, size, alignment, initial_domain,
- flags, type, resv, &bo);
+ bp.flags = flags;
+ bp.domain = initial_domain;
+ r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
if (r != -ERESTARTSYS) {
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
/* reject invalid gem domains */
- if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GDS |
- AMDGPU_GEM_DOMAIN_GWS |
- AMDGPU_GEM_DOMAIN_OA))
+ if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
return -EINVAL;
/* create a gem object to contain this object in */
@@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
+
+#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \
+ if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
+ seq_printf((m), " " #flag); \
+ }
+
static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
{
struct drm_gem_object *gobj = ptr;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct seq_file *m = data;
+ struct dma_buf_attachment *attachment;
+ struct dma_buf *dma_buf;
unsigned domain;
const char *placement;
unsigned pin_count;
- uint64_t offset;
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
switch (domain) {
@@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
seq_printf(m, "\t0x%08x: %12ld byte %s",
id, amdgpu_bo_size(bo), placement);
- offset = READ_ONCE(bo->tbo.mem.start);
- if (offset != AMDGPU_BO_INVALID_OFFSET)
- seq_printf(m, " @ 0x%010Lx", offset);
-
pin_count = READ_ONCE(bo->pin_count);
if (pin_count)
seq_printf(m, " pin count %d", pin_count);
+
+ dma_buf = READ_ONCE(bo->gem_base.dma_buf);
+ attachment = READ_ONCE(bo->gem_base.import_attach);
+
+ if (attachment)
+ seq_printf(m, " imported from %p", dma_buf);
+ else if (dma_buf)
+ seq_printf(m, " exported as %p", dma_buf);
+
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+ amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
seq_printf(m, "\n");
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 311589e02d17..f70eeed9ed76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
+ unsigned fence_flags = 0;
unsigned i;
int r = 0;
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
#endif
amdgpu_asic_invalidate_hdp(adev, ring);
- r = amdgpu_fence_emit(ring, f);
+ if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+ fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+ r = amdgpu_fence_emit(ring, f, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* wrap the last IB with fence */
if (job && job->uf_addr) {
amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
- AMDGPU_FENCE_FLAG_64BIT);
+ fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4b7824d30e73..91517b166a3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -31,6 +31,7 @@
#include "amdgpu_sched.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
+#include "atom.h"
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
@@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.rlc_fw_version;
fw_info->feature = adev->gfx.rlc_feature_version;
break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+ fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+ fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+ fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+ break;
+ case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+ fw_info->ver = adev->gfx.rlc_srls_fw_version;
+ fw_info->feature = adev->gfx.rlc_srls_feature_version;
+ break;
case AMDGPU_INFO_FW_GFX_MEC:
if (query_fw->index == 0) {
fw_info->ver = adev->gfx.mec_fw_version;
@@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, found;
+ int i, j, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
return -EINVAL;
+ /* Ensure IB tests are run on ring */
+ flush_delayed_work(&adev->late_init_work);
+
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
ui32 = adev->accel_working;
@@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_UVD:
type = AMD_IP_BLOCK_TYPE_UVD;
- ring_mask = adev->uvd.ring.ready ? 1 : 0;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+ ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 16;
break;
@@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
- for (i = 0; i < adev->uvd.num_enc_rings; i++)
- ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+ for (j = 0; j < adev->uvd.num_enc_rings; j++)
+ ring_mask |=
+ ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
+ (j + i * adev->uvd.num_enc_rings));
ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
ib_size_alignment = 1;
break;
@@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
}
}
case AMDGPU_INFO_SENSOR: {
- struct pp_gpu_power query = {0};
- int query_size = sizeof(query);
-
- if (amdgpu_dpm == 0)
+ if (!adev->pm.dpm_enabled)
return -ENOENT;
switch (info->sensor_info.type) {
@@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
/* get average GPU power */
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_POWER,
- (void *)&query, &query_size)) {
+ (void *)&ui32, &ui32_size)) {
return -EINVAL;
}
- ui32 = query.average_gpu_power >> 8;
+ ui32 >>= 8;
break;
case AMDGPU_INFO_SENSOR_VDDNB:
/* get VDDNB in millivolts */
@@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
return;
pm_runtime_get_sync(dev->dev);
-
- amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+ amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
if (adev->asic_type != CHIP_RAVEN) {
amdgpu_uvd_free_handles(adev, file_priv);
@@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
amdgpu_vm_fini(adev, &fpriv->vm);
+ amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
if (pasid)
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
amdgpu_bo_unref(&pd);
@@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
struct drm_amdgpu_info_firmware fw_info;
struct drm_amdgpu_query_fw query_fw;
+ struct atom_context *ctx = adev->mode_info.atom_context;
int ret, i;
/* VCE */
@@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* RLC SAVE RESTORE LIST CNTL */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST GPM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
+ /* RLC SAVE RESTORE LIST SRM MEM */
+ query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
/* MEC */
query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
query_fw.index = 0;
@@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+
+ seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4cb8e6c..83e344fbb50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
#include <drm/drm.h>
#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
+ enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct interval_tree_node *it;
+
+ /* notification is exclusive, but interval is inclusive */
+ end -= 1;
+
+ amdgpu_mn_read_lock(rmn);
+
+ it = interval_tree_iter_first(&rmn->objects, start, end);
+ while (it) {
+ struct amdgpu_mn_node *node;
+ struct amdgpu_bo *bo;
+
+ node = container_of(it, struct amdgpu_mn_node, it);
+ it = interval_tree_iter_next(it, start, end);
+
+ list_for_each_entry(bo, &node->bos, mn_list) {
+ struct kgd_mem *mem = bo->kfd_bo;
+
+ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, mm);
+ }
+ }
+}
+
+/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
amdgpu_mn_read_unlock(rmn);
}
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+ [AMDGPU_MN_TYPE_GFX] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
+ [AMDGPU_MN_TYPE_HSA] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
};
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
struct amdgpu_mn *rmn;
+ unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
- hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
- if (rmn->mm == mm)
+ hash_for_each_possible(adev->mn_hash, rmn, node, key)
+ if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
goto release_locks;
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
- rmn->mn.ops = &amdgpu_mn_ops;
init_rwsem(&rmn->lock);
+ rmn->type = type;
+ rmn->mn.ops = &amdgpu_mn_ops[type];
rmn->objects = RB_ROOT_CACHED;
mutex_init(&rmn->read_lock);
atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
if (r)
goto free_rmn;
- hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+ hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ enum amdgpu_mn_type type =
+ bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
struct amdgpu_mn *rmn;
- struct amdgpu_mn_node *node = NULL;
+ struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
- rmn = amdgpu_mn_get(adev);
+ rmn = amdgpu_mn_get(adev, type);
if (IS_ERR(rmn))
return PTR_ERR(rmn);
+ new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&bos);
down_write(&rmn->lock);
@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&node->bos, &bos);
}
- if (!node) {
- node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
- if (!node) {
- up_write(&rmn->lock);
- return -ENOMEM;
- }
- }
+ if (!node)
+ node = new_node;
+ else
+ kfree(new_node);
bo->mn = rmn;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3793b8..eb0f432f78fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
+enum amdgpu_mn_type {
+ AMDGPU_MN_TYPE_GFX,
+ AMDGPU_MN_TYPE_HSA,
+};
+
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d6416ee52e32..b9e9e8b02fb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,7 +308,6 @@ struct amdgpu_display_funcs {
struct amdgpu_framebuffer {
struct drm_framebuffer base;
- struct drm_gem_object *obj;
/* caching for later use */
uint64_t address;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6d08cde8443c..6a9e46ae7f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr)
{
+ struct amdgpu_bo_param bp;
bool free = false;
int r;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = align;
+ bp.domain = domain;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+
if (!*bo_ptr) {
- r = amdgpu_bo_create(adev, size, align, domain,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL, bo_ptr);
+ r = amdgpu_bo_create(adev, &bp, bo_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
r);
@@ -341,27 +348,25 @@ fail:
return false;
}
-static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
struct ttm_operation_ctx ctx = {
- .interruptible = (type != ttm_bo_type_kernel),
+ .interruptible = (bp->type != ttm_bo_type_kernel),
.no_wait_gpu = false,
- .resv = resv,
+ .resv = bp->resv,
.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
};
struct amdgpu_bo *bo;
- unsigned long page_align;
+ unsigned long page_align, size = bp->size;
size_t acc_size;
int r;
- page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+ page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
size = ALIGN(size, PAGE_SIZE);
- if (!amdgpu_bo_validate_size(adev, size, domain))
+ if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
*bo_ptr = NULL;
@@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
- bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_CPU |
- AMDGPU_GEM_DOMAIN_GDS |
- AMDGPU_GEM_DOMAIN_GWS |
- AMDGPU_GEM_DOMAIN_OA);
+ bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+ bp->domain;
bo->allowed_domains = bo->preferred_domains;
- if (type != ttm_bo_type_kernel &&
+ if (bp->type != ttm_bo_type_kernel &&
bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
- bo->flags = flags;
+ bo->flags = bp->flags;
#ifdef CONFIG_X86_32
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
#endif
bo->tbo.bdev = &adev->mman.bdev;
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_ttm_placement_from_domain(bo, bp->domain);
+ if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 1;
- r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
+ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
&bo->placement, page_align, &ctx, acc_size,
- NULL, resv, &amdgpu_ttm_bo_destroy);
+ NULL, bp->resv, &amdgpu_ttm_bo_destroy);
if (unlikely(r != 0))
return r;
@@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
- if (type == ttm_bo_type_kernel)
- bo->tbo.priority = 1;
-
- if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+ if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
@@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
bo->tbo.moving = dma_fence_get(fence);
dma_fence_put(fence);
}
- if (!resv)
+ if (!bp->resv)
amdgpu_bo_unreserve(bo);
*bo_ptr = bo;
trace_amdgpu_bo_create(bo);
/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
- if (type == ttm_bo_type_device)
+ if (bp->type == ttm_bo_type_device)
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
return 0;
fail_unreserve:
- if (!resv)
+ if (!bp->resv)
ww_mutex_unlock(&bo->tbo.resv->lock);
amdgpu_bo_unref(&bo);
return r;
@@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
unsigned long size, int byte_align,
struct amdgpu_bo *bo)
{
+ struct amdgpu_bo_param bp;
int r;
if (bo->shadow)
return 0;
- r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_SHADOW,
- ttm_bo_type_kernel,
- bo->tbo.resv, &bo->shadow);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = byte_align;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+ AMDGPU_GEM_CREATE_SHADOW;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = bo->tbo.resv;
+
+ r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
- uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
+ u64 flags = bp->flags;
int r;
- r = amdgpu_bo_do_create(adev, size, byte_align, domain,
- parent_flags, type, resv, bo_ptr);
+ bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+ r = amdgpu_bo_do_create(adev, bp, bo_ptr);
if (r)
return r;
if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
- if (!resv)
+ if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
- r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
+ r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
- if (!resv)
+ if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv);
if (r)
@@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
/* A shared bo cannot be migrated to VRAM */
- if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
- return -EINVAL;
+ if (bo->prime_shared_count) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ else
+ return -EINVAL;
+ }
+
+ /* This assumes only APU display buffers are pinned with (VRAM|GTT).
+ * See function amdgpu_display_supported_domains()
+ */
+ if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+ domain = AMDGPU_GEM_DOMAIN_VRAM;
+ if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ }
if (bo->pin_count) {
uint32_t mem_type = bo->tbo.mem.mem_type;
@@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
return amdgpu_ttm_init(adev);
}
+int amdgpu_bo_late_init(struct amdgpu_device *adev)
+{
+ amdgpu_ttm_late_init(adev);
+
+ return 0;
+}
+
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 546f77cb7882..540e03fa159f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,6 +33,16 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
+struct amdgpu_bo_param {
+ unsigned long size;
+ int byte_align;
+ u32 domain;
+ u32 preferred_domain;
+ u64 flags;
+ enum ttm_bo_type type;
+ struct reservation_object *resv;
+};
+
/* bo virtual addresses in a vm */
struct amdgpu_bo_va_mapping {
struct amdgpu_bo_va *bo_va;
@@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+ */
+static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+ struct drm_mm_node *node = bo->tbo.mem.mm_node;
+ unsigned long pages_left;
+
+ if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
+ return false;
+
+ for (pages_left = bo->tbo.mem.num_pages; pages_left;
+ pages_left -= node->size, node++)
+ if (node->start < fpfn)
+ return true;
+
+ return false;
+}
+
+/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
- int byte_align, u32 domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+ struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
+int amdgpu_bo_late_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 361975cf45a9..b455da487782 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
}
}
+/**
+ * DOC: power_dpm_state
+ *
+ * This is a legacy interface and is only provided for backwards compatibility.
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters. The file power_dpm_state is used for this.
+ * It accepts the following arguments:
+ * - battery
+ * - balanced
+ * - performance
+ *
+ * battery
+ *
+ * On older GPUs, the vbios provided a special power state for battery
+ * operation. Selecting battery switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * balanced
+ *
+ * On older GPUs, the vbios provided a special power state for balanced
+ * operation. Selecting balanced switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * performance
+ *
+ * On older GPUs, the vbios provided a special power state for performance
+ * operation. Selecting performance switched to this state. This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ */
+
static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -131,6 +162,59 @@ fail:
return count;
}
+
+/**
+ * DOC: power_dpm_force_performance_level
+ *
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters. The file power_dpm_force_performance_level is
+ * used for this. It accepts the following arguments:
+ * - auto
+ * - low
+ * - high
+ * - manual
+ * - GPU fan
+ * - profile_standard
+ * - profile_min_sclk
+ * - profile_min_mclk
+ * - profile_peak
+ *
+ * auto
+ *
+ * When auto is selected, the driver will attempt to dynamically select
+ * the optimal power profile for current conditions in the driver.
+ *
+ * low
+ *
+ * When low is selected, the clocks are forced to the lowest power state.
+ *
+ * high
+ *
+ * When high is selected, the clocks are forced to the highest power state.
+ *
+ * manual
+ *
+ * When manual is selected, the user can manually adjust which power states
+ * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
+ * and pp_dpm_pcie files and adjust the power state transition heuristics
+ * via the pp_power_profile_mode sysfs file.
+ *
+ * profile_standard
+ * profile_min_sclk
+ * profile_min_mclk
+ * profile_peak
+ *
+ * When the profiling modes are selected, clock and power gating are
+ * disabled and the clocks are set for different profiling cases. This
+ * mode is recommended for profiling specific work loads where you do
+ * not want clock or power gating for clock fluctuation to interfere
+ * with your results. profile_standard sets the clocks to a fixed clock
+ * level which varies from asic to asic. profile_min_sclk forces the sclk
+ * to the lowest level. profile_min_mclk forces the mclk to the lowest level.
+ * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
+ *
+ */
+
static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -324,6 +408,17 @@ fail:
return count;
}
+/**
+ * DOC: pp_table
+ *
+ * The amdgpu driver provides a sysfs API for uploading new powerplay
+ * tables. The file pp_table is used for this. Reading the file
+ * will dump the current power play table. Writing to the file
+ * will attempt to upload a new powerplay table and re-initialize
+ * powerplay using that new table.
+ *
+ */
+
static ssize_t amdgpu_get_pp_table(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return count;
}
+/**
+ * DOC: pp_od_clk_voltage
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
+ * in each power level within a power state. The pp_od_clk_voltage is used for
+ * this.
+ *
+ * Reading the file will display:
+ * - a list of engine clock levels and voltages labeled OD_SCLK
+ * - a list of memory clock levels and voltages labeled OD_MCLK
+ * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
+ *
+ * To manually adjust these settings, first select manual using
+ * power_dpm_force_performance_level. Enter a new value for each
+ * level by writing a string that contains "s/m level clock voltage" to
+ * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
+ * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
+ * 810 mV. When you have edited all of the states as needed, write
+ * "c" (commit) to the file to commit your changes. If you want to reset to the
+ * default power levels, write "r" (reset) to the file to reset them.
+ *
+ */
+
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
struct device_attribute *attr,
const char *buf,
@@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
return snprintf(buf, PAGE_SIZE, "\n");
@@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
}
+/**
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+ *
+ * The amdgpu driver provides a sysfs API for adjusting what power levels
+ * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
+ * and pp_dpm_pcie are used for this.
+ *
+ * Reading back the files will show you the available power levels within
+ * the power state and the clock information for those levels.
+ *
+ * To manually adjust these states, first select manual using
+ * power_dpm_force_performance_level.
+ * Secondly,Enter a new value for each level by inputing a string that
+ * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
+ * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ */
+
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
- uint32_t i, mask = 0;
- char sub_str[2];
+ uint32_t mask = 0;
+ char *sub_str = NULL;
+ char *tmp;
+ char buf_cpy[count];
+ const char delimiter[3] = {' ', '\n', '\0'};
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ memcpy(buf_cpy, buf, count+1);
+ tmp = buf_cpy;
+ while (tmp[0]) {
+ sub_str = strsep(&tmp, delimiter);
+ if (strlen(sub_str)) {
+ ret = kstrtol(sub_str, 0, &level);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ mask |= 1 << level;
+ } else
+ break;
}
-
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
@@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
- uint32_t i, mask = 0;
- char sub_str[2];
+ uint32_t mask = 0;
+ char *sub_str = NULL;
+ char *tmp;
+ char buf_cpy[count];
+ const char delimiter[3] = {' ', '\n', '\0'};
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ memcpy(buf_cpy, buf, count+1);
+ tmp = buf_cpy;
+ while (tmp[0]) {
+ sub_str = strsep(&tmp, delimiter);
+ if (strlen(sub_str)) {
+ ret = kstrtol(sub_str, 0, &level);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ mask |= 1 << level;
+ } else
+ break;
}
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
- uint32_t i, mask = 0;
- char sub_str[2];
+ uint32_t mask = 0;
+ char *sub_str = NULL;
+ char *tmp;
+ char buf_cpy[count];
+ const char delimiter[3] = {' ', '\n', '\0'};
- for (i = 0; i < strlen(buf); i++) {
- if (*(buf + i) == '\n')
- continue;
- sub_str[0] = *(buf + i);
- sub_str[1] = '\0';
- ret = kstrtol(sub_str, 0, &level);
+ memcpy(buf_cpy, buf, count+1);
+ tmp = buf_cpy;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
+ while (tmp[0]) {
+ sub_str = strsep(&tmp, delimiter);
+ if (strlen(sub_str)) {
+ ret = kstrtol(sub_str, 0, &level);
+
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ mask |= 1 << level;
+ } else
+ break;
}
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -668,6 +819,26 @@ fail:
return count;
}
+/**
+ * DOC: pp_power_profile_mode
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the heuristics
+ * related to switching between power levels in a power state. The file
+ * pp_power_profile_mode is used for this.
+ *
+ * Reading this file outputs a list of all of the predefined power profiles
+ * and the relevant heuristics settings for that profile.
+ *
+ * To select a profile or create a custom profile, first select manual using
+ * power_dpm_force_performance_level. Writing the number of a predefined
+ * profile to pp_power_profile_mode will enable those heuristics. To
+ * create a custom set of heuristics, write a string of numbers to the file
+ * starting with the number of the custom profile along with a setting
+ * for each heuristic parameter. Due to differences across asic families
+ * the heuristic parameters vary from family to family.
+ *
+ */
+
static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
- struct pp_gpu_power query = {0};
- int r, size = sizeof(query);
+ u32 query = 0;
+ int r, size = sizeof(u32);
unsigned uw;
/* Can't get power when the card is off */
@@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
return r;
/* convert to microwatts */
- uw = (query.average_gpu_power >> 8) * 1000000;
+ uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
return snprintf(buf, PAGE_SIZE, "%u\n", uw);
}
@@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
return count;
}
+
+/**
+ * DOC: hwmon
+ *
+ * The amdgpu driver exposes the following sensor interfaces:
+ * - GPU temperature (via the on-die sensor)
+ * - GPU voltage
+ * - Northbridge voltage (APUs only)
+ * - GPU power
+ * - GPU fan
+ *
+ * hwmon interfaces for GPU temperature:
+ * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *
+ * hwmon interfaces for GPU voltage:
+ * - in0_input: the voltage on the GPU in millivolts
+ * - in1_input: the voltage on the Northbridge in millivolts
+ *
+ * hwmon interfaces for GPU power:
+ * - power1_average: average power used by the GPU in microWatts
+ * - power1_cap_min: minimum cap supported in microWatts
+ * - power1_cap_max: maximum cap supported in microWatts
+ * - power1_cap: selected power cap in microWatts
+ *
+ * hwmon interfaces for GPU fan:
+ * - pwm1: pulse width modulation fan level (0-255)
+ * - pwm1_enable: pulse width modulation fan control method
+ * 0: no fan speed control
+ * 1: manual fan speed control using pwm interface
+ * 2: automatic fan speed control
+ * - pwm1_min: pulse width modulation fan control minimum level (0)
+ * - pwm1_max: pulse width modulation fan control maximum level (255)
+ * - fan1_input: fan speed in RPM
+ *
+ * You can use hwmon tools like sensors to view this information on your system.
+ *
+ */
+
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
- /* handle non-powerplay limitations */
- if (!adev->powerplay.pp_handle) {
- /* Skip fan attributes if fan is not present */
- if (adev->pm.no_fan &&
- (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
- return 0;
- /* requires powerplay */
- if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
- return 0;
- }
+
+ /* Skip fan attributes if fan is not present */
+ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+ return 0;
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
@@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
{
- struct drm_device *ddev = adev->ddev;
- struct drm_crtc *crtc;
- struct amdgpu_crtc *amdgpu_crtc;
int i = 0;
if (!adev->pm.dpm_enabled)
@@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
}
if (adev->powerplay.pp_funcs->dispatch_tasks) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ mutex_lock(&adev->pm.mutex);
+ amdgpu_dpm_get_active_displays(adev);
+ adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
+ adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
+ adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
+ /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
+ if (adev->pm.pm_display_cfg.vrefresh > 120)
+ adev->pm.pm_display_cfg.min_vblank_time = 0;
+ if (adev->powerplay.pp_funcs->display_configuration_change)
+ adev->powerplay.pp_funcs->display_configuration_change(
+ adev->powerplay.pp_handle,
+ &adev->pm.pm_display_cfg);
+ mutex_unlock(&adev->pm.mutex);
+ }
amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
} else {
mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.new_active_crtcs = 0;
- adev->pm.dpm.new_active_crtc_count = 0;
- if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
- list_for_each_entry(crtc,
- &ddev->mode_config.crtc_list, head) {
- amdgpu_crtc = to_amdgpu_crtc(crtc);
- if (amdgpu_crtc->enabled) {
- adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
- adev->pm.dpm.new_active_crtc_count++;
- }
- }
- }
+ amdgpu_dpm_get_active_displays(adev);
/* update battery/ac status */
if (power_supply_is_system_supplied() > 0)
adev->pm.dpm.ac_power = true;
@@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
uint32_t value;
- struct pp_gpu_power query = {0};
+ uint32_t query = 0;
int size;
/* sanity check PP is enabled */
@@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDNB)\n", value);
- size = sizeof(query);
- if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
- seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
- query.vddc_power & 0xff);
- seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
- query.vddci_power & 0xff);
- seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
- query.max_gpu_power & 0xff);
- seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
- query.average_gpu_power & 0xff);
- }
+ size = sizeof(uint32_t);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
+ seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
size = sizeof(value);
seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4b584cb75bf4..4683626b065f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct reservation_object *resv = attach->dmabuf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
int ret;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = attach->dmabuf->size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = resv;
ww_mutex_lock(&resv->lock, NULL);
- ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
- resv, &bo);
+ ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret)
goto error;
@@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { true, false };
- u32 domain = amdgpu_display_framebuffer_domains(adev);
+ u32 domain = amdgpu_display_supported_domains(adev);
int ret;
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c7d43e064fc7..9f1a5bd39ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -52,6 +52,7 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 262c1267249e..8af16e81c7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
u32 ring,
struct amdgpu_ring **out_ring)
{
+ u32 instance;
+
switch (mapper->hw_ip) {
case AMDGPU_HW_IP_GFX:
*out_ring = &adev->gfx.gfx_ring[ring];
@@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
*out_ring = &adev->sdma.instance[ring].ring;
break;
case AMDGPU_HW_IP_UVD:
- *out_ring = &adev->uvd.ring;
+ instance = ring;
+ *out_ring = &adev->uvd.inst[instance].ring;
break;
case AMDGPU_HW_IP_VCE:
*out_ring = &adev->vce.ring[ring];
break;
case AMDGPU_HW_IP_UVD_ENC:
- *out_ring = &adev->uvd.ring_enc[ring];
+ instance = ring / adev->uvd.num_enc_rings;
+ *out_ring =
+ &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
break;
case AMDGPU_HW_IP_VCN_DEC:
*out_ring = &adev->vcn.ring_dec;
@@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
ip_num_rings = adev->sdma.num_instances;
break;
case AMDGPU_HW_IP_UVD:
- ip_num_rings = 1;
+ ip_num_rings = adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCE:
ip_num_rings = adev->vce.num_rings;
break;
case AMDGPU_HW_IP_UVD_ENC:
- ip_num_rings = adev->uvd.num_enc_rings;
+ ip_num_rings =
+ adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
break;
case AMDGPU_HW_IP_VCN_DEC:
ip_num_rings = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d5f526f38e50..c6850b629d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
+ ring->me = 0;
ring->adev->rings[ring->idx] = NULL;
}
@@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
spin_unlock(&adev->ring_lru_list_lock);
}
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @adev: amdgpu_device pointer
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
/*
* Debugfs info
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1a5911882657..1513124c5659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
#include <drm/drm_print.h>
/* max number of rings */
-#define AMDGPU_MAX_RINGS 18
+#define AMDGPU_MAX_RINGS 21
#define AMDGPU_MAX_GFX_RINGS 1
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
@@ -42,6 +42,7 @@
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
enum amdgpu_ring_type {
AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
+ unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -154,6 +156,9 @@ struct amdgpu_ring_funcs {
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
+ void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
@@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
int *blacklist, int num_blacklist,
bool lru_pipe_order, struct amdgpu_ring **ring);
void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t val0,
+ uint32_t reg1, uint32_t val1);
+
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 2dbe87591f81..d167e8ab76d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_bo *vram_obj = NULL;
struct amdgpu_bo **gtt_obj = NULL;
+ struct amdgpu_bo_param bp;
uint64_t gart_addr, vram_addr;
unsigned n, size;
int i, r;
@@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = 1;
goto out_cleanup;
}
-
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
- ttm_bo_type_kernel, NULL, &vram_obj);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+
+ r = amdgpu_bo_create(adev, &bp, &vram_obj);
if (r) {
DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup;
@@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void **vram_start, **vram_end;
struct dma_fence *fence = NULL;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT, 0,
- ttm_bo_type_kernel, NULL, gtt_obj + i);
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263ab6e16..e96e26d3f3b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
),
TP_fast_assign(
- __entry->bo = bo_va->base.bo;
+ __entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3ff9cd0..e93a0a237dc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
/*
* Global memory.
*/
+
+/**
+ * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
+ * memory object
+ *
+ * @ref: Object for initialization.
+ *
+ * This is called by drm_global_item_ref() when an object is being
+ * initialized.
+ */
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
{
return ttm_mem_global_init(ref->object);
}
+/**
+ * amdgpu_ttm_mem_global_release - Drop reference to a memory object
+ *
+ * @ref: Object being removed
+ *
+ * This is called by drm_global_item_unref() when an object is being
+ * released.
+ */
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
{
ttm_mem_global_release(ref->object);
}
+/**
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference
+ * structures.
+ *
+ * @adev: AMDGPU device for which the global structures need to be
+ * registered.
+ *
+ * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
+ * during bring up.
+ */
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
@@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
struct drm_sched_rq *rq;
int r;
+ /* ensure reference is false in case init fails */
adev->mman.mem_global_referenced = false;
+
global_ref = &adev->mman.mem_global_ref;
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
global_ref->size = sizeof(struct ttm_mem_global);
@@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
ring = adev->mman.buffer_funcs_ring;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
- rq, amdgpu_sched_jobs, NULL);
+ rq, NULL);
if (r) {
DRM_ERROR("Failed setting up TTM BO move run queue.\n");
goto error_entity;
@@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
return 0;
}
+/**
+ * amdgpu_init_mem_type - Initialize a memory manager for a specific
+ * type of memory request.
+ *
+ * @bdev: The TTM BO device object (contains a reference to
+ * amdgpu_device)
+ * @type: The type of memory requested
+ * @man:
+ *
+ * This is called by ttm_bo_init_mm() when a buffer object is being
+ * initialized.
+ */
static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
struct ttm_mem_type_manager *man)
{
@@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
man->default_caching = TTM_PL_FLAG_CACHED;
break;
case TTM_PL_TT:
+ /* GTT memory */
man->func = &amdgpu_gtt_mgr_func;
man->gpu_offset = adev->gmc.gart_start;
man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
return 0;
}
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
struct ttm_placement *placement)
{
@@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
};
+ /* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
placement->num_busy_placement = 0;
return;
}
+ /* Object isn't an AMDGPU object so ignore */
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
placement->busy_placement = &placements;
@@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
placement->num_busy_placement = 1;
return;
}
+
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
+ /* Move to system memory */
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
- unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
- struct drm_mm_node *node = bo->mem.mm_node;
- unsigned long pages_left;
-
- for (pages_left = bo->mem.num_pages;
- pages_left;
- pages_left -= node->size, node++) {
- if (node->start < fpfn)
- break;
- }
-
- if (!pages_left)
- goto gtt;
+ !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+ amdgpu_bo_in_cpu_visible_vram(abo)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
*/
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT);
- abo->placements[0].fpfn = fpfn;
+ abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
abo->placement.busy_placement = &abo->placements[1];
abo->placement.num_busy_placement = 1;
} else {
-gtt:
+ /* Move to GTT memory */
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
}
break;
@@ -261,6 +304,15 @@ gtt:
*placement = abo->placement;
}
+/**
+ * amdgpu_verify_access - Verify access for a mmap call
+ *
+ * @bo: The buffer object to map
+ * @filp: The file pointer from the process performing the mmap
+ *
+ * This is called by ttm_bo_mmap() to verify whether a process
+ * has the right to mmap a BO to their process space.
+ */
static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
filp->private_data);
}
+/**
+ * amdgpu_move_null - Register memory for a buffer object
+ *
+ * @bo: The bo to assign the memory to
+ * @new_mem: The memory to be assigned.
+ *
+ * Assign the memory from new_mem to the memory of the buffer object
+ * bo.
+ */
static void amdgpu_move_null(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
{
@@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
new_mem->mm_node = NULL;
}
+/**
+ * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT
+ * buffer.
+ */
static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
struct drm_mm_node *mm_node,
struct ttm_mem_reg *mem)
@@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node
- * corresponding to @offset. It also modifies the offset to be
- * within the drm_mm_node returned
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ * corresponding to @offset. It also modifies
+ * the offset to be within the drm_mm_node
+ * returned
*/
static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
unsigned long *offset)
@@ -443,7 +509,12 @@ error:
return r;
}
-
+/**
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and
+ * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ */
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
@@ -478,6 +549,11 @@ error:
return r;
}
+/**
+ * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
+ *
+ * Called by amdgpu_bo_move().
+ */
static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
int r;
adev = amdgpu_ttm_adev(bo->bdev);
+
+ /* create space/pages for new_mem in GTT space */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
@@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
return r;
}
+ /* set caching flags */
r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
if (unlikely(r)) {
goto out_cleanup;
}
+ /* Bind the memory to the GTT space */
r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* blit VRAM to GTT */
r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* move BO (in tmp_mem) to new_mem */
r = ttm_bo_move_ttm(bo, ctx, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
}
+/**
+ * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
+ *
+ * Called by amdgpu_bo_move().
+ */
static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
int r;
adev = amdgpu_ttm_adev(bo->bdev);
+
+ /* make space in GTT for old_mem buffer */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
placement.num_placement = 1;
@@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
if (unlikely(r)) {
return r;
}
+
+ /* move/bind old memory to GTT space */
r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
+
+ /* copy to VRAM */
r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
@@ -561,6 +656,11 @@ out_cleanup:
return r;
}
+/**
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
@@ -626,6 +726,11 @@ memcpy:
return 0;
}
+/**
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +800,7 @@ struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
- struct mm_struct *usermm;
+ struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
@@ -703,17 +808,29 @@ struct amdgpu_ttm_tt {
uint32_t last_set_pages;
};
+/**
+ * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to
+ * by a USERPTR pointer to memory
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
+ * This provides a wrapper around the get_user_pages() call to provide
+ * device accessible pages that back user memory.
+ */
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
+ if (!mm) /* Happens during process shutdown */
+ return -ESRCH;
+
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
- down_read(&current->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only use anonymous memory
@@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
- vma = find_vma(gtt->usermm, gtt->userptr);
+ vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return -EPERM;
}
}
+ /* loop enough times using contiguous pages of memory */
do {
unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ if (mm == current->mm)
+ r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ else
+ r = get_user_pages_remote(gtt->usertask,
+ mm, userptr, num_pages,
+ flags, p, NULL, NULL);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
@@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
} while (pinned < ttm->num_pages);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
release_pages:
release_pages(pages, pinned);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return r;
}
+/**
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages
+ * as necessary.
+ *
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
}
}
+/**
+ * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ *
+ * Called while unpinning userptr pages
+ */
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
}
}
-/* prepare the sg table with the user pages */
+/**
+ * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the
+ * user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ /* Allocate an SG array and squash pages into it */
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL);
if (r)
goto release_sg;
+ /* Map SG to device */
r = -ENOMEM;
nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
if (nents != ttm->sg->nents)
goto release_sg;
+ /* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address, ttm->num_pages);
@@ -826,6 +970,9 @@ release_sg:
return r;
}
+/**
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
if (!ttm->sg->sgl)
return;
- /* free the sg table and pages again */
+ /* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+ /* mark the pages as dirty */
amdgpu_ttm_tt_mark_user_pages(ttm);
sg_free_table(ttm->sg);
}
+int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+ struct ttm_buffer_object *tbo,
+ uint64_t flags)
+{
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+ struct ttm_tt *ttm = tbo->ttm;
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ int r;
+
+ if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+ uint64_t page_idx = 1;
+
+ r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+ ttm->pages, gtt->ttm.dma_address, flags);
+ if (r)
+ goto gart_bind_fail;
+
+ /* Patch mtype of the second part BO */
+ flags &= ~AMDGPU_PTE_MTYPE_MASK;
+ flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+
+ r = amdgpu_gart_bind(adev,
+ gtt->offset + (page_idx << PAGE_SHIFT),
+ ttm->num_pages - page_idx,
+ &ttm->pages[page_idx],
+ &(gtt->ttm.dma_address[page_idx]), flags);
+ } else {
+ r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+ ttm->pages, gtt->ttm.dma_address, flags);
+ }
+
+gart_bind_fail:
+ if (r)
+ DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+ ttm->num_pages, gtt->offset);
+
+ return r;
+}
+
+/**
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
@@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return 0;
}
+ /* compute PTE flags relevant to this BO memory */
flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+ /* bind pages into GART page tables */
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return r;
}
+/**
+ * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
+ */
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
return 0;
+ /* allocate GTT space */
tmp = bo->mem;
tmp.mm_node = NULL;
placement.num_placement = 1;
@@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
if (unlikely(r))
return r;
+ /* compute PTE flags for this buffer object */
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+
+ /* Bind pages */
gtt->offset = (u64)tmp.start << PAGE_SHIFT;
- r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
- bo->ttm->pages, gtt->ttm.dma_address, flags);
+ r = amdgpu_ttm_gart_bind(adev, bo, flags);
if (unlikely(r)) {
ttm_bo_mem_put(bo, &tmp);
return r;
@@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
}
+/**
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
uint64_t flags;
int r;
- if (!gtt)
+ if (!tbo->ttm)
return 0;
- flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
- r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
- gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
- if (r)
- DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
- gtt->ttm.ttm.num_pages, gtt->offset);
+ flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+ r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+
return r;
}
+/**
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
+ /* if the pages have userptr pinning then clear that first */
if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm);
@@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+
ttm_dma_tt_fini(&gtt->ttm);
kfree(gtt);
}
@@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = {
.destroy = &amdgpu_ttm_backend_destroy,
};
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ *
+ * Called by ttm_tt_create().
+ */
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
@@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
+
+ /* allocate space for the uninitialized page entries */
if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
kfree(gtt);
return NULL;
@@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
return &gtt->ttm.ttm;
}
+/**
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct ttm_operation_ctx *ctx)
{
@@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct amdgpu_ttm_tt *gtt = (void *)ttm;
bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+ /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt && gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
@@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
}
#endif
+ /* fall back to generic helper to populate the page array
+ * and map them to the device */
return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
}
+/**
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
struct amdgpu_device *adev;
@@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
}
#endif
+ /* fall back to generic helper to unmap and unpopulate array */
ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
}
+/**
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt
+ * for the current task
+ *
+ * @ttm: The ttm_tt object to bind this userptr object to
+ * @addr: The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
+ * to current task
+ */
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags)
{
@@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return -EINVAL;
gtt->userptr = addr;
- gtt->usermm = current->mm;
gtt->userflags = flags;
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+ gtt->usertask = current->group_leader;
+ get_task_struct(gtt->usertask);
+
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return 0;
}
+/**
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
if (gtt == NULL)
return NULL;
- return gtt->usermm;
+ if (gtt->usertask == NULL)
+ return NULL;
+
+ return gtt->usertask->mm;
}
+/**
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays
+ * inside an address range for the
+ * current task.
+ *
+ */
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
@@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt == NULL || !gtt->userptr)
return false;
+ /* Return false if no part of the ttm_tt object lies within
+ * the range
+ */
size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
+ /* Search the lists of tasks that hold this mapping and see
+ * if current is one of them. If it is return false.
+ */
spin_lock(&gtt->guptasklock);
list_for_each_entry(entry, &gtt->guptasks, list) {
if (entry->task == current) {
@@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
return true;
}
+/**
+ * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been
+ * invalidated?
+ */
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated)
{
@@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
return prev_invalidated != *last_invalidated;
}
+/**
+ * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this
+ * ttm_tt object been invalidated
+ * since the last time they've
+ * been set?
+ */
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
}
+/**
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem)
{
@@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
return flags;
}
+/**
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict
+ * a buffer object.
+ *
+ * Return true if eviction is sensible. Called by
+ * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
+ * which tries to evict buffer objects until it can find space
+ * for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
@@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
return ttm_bo_eviction_valuable(bo, place);
}
+/**
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a
+ * buffer object.
+ *
+ * @bo: The buffer object to read/write
+ * @offset: Offset into buffer object
+ * @buf: Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write: true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
@@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_param bp;
int r = 0;
int i;
u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
u64 size = adev->fw_vram_usage.size;
struct amdgpu_bo *bo;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = adev->fw_vram_usage.size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
if (adev->fw_vram_usage.size > 0 &&
adev->fw_vram_usage.size <= vram_size) {
- r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- ttm_bo_type_kernel, NULL,
+ r = amdgpu_bo_create(adev, &bp,
&adev->fw_vram_usage.reserved_bo);
if (r)
goto error_create;
@@ -1398,13 +1718,22 @@ error_create:
adev->fw_vram_usage.reserved_bo = NULL;
return r;
}
-
+/**
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as
+ * various gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
int r;
u64 vis_vram_limit;
+ /* initialize global references for vram/gtt */
r = amdgpu_ttm_global_init(adev);
if (r) {
return r;
@@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* We opt to avoid OOM on system pages allocations */
adev->mman.bdev.no_retry = true;
+ /* Initialize VRAM pool with all of VRAM divided into pages */
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
adev->gmc.real_vram_size >> PAGE_SHIFT);
if (r) {
@@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->stolen_vga_memory,
- NULL, NULL);
- if (r)
- return r;
+ /* allocate memory as required for VGA
+ * This is used for VGA emulation and pre-OS scanout buffers to
+ * avoid display artifacts while transitioning between pre-OS
+ * and driver. */
+ if (adev->gmc.stolen_size) {
+ r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->stolen_vga_memory,
+ NULL, NULL);
+ if (r)
+ return r;
+ }
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
+ /* Compute GTT size, either bsaed on 3/4th the size of RAM size
+ * or whatever the user passed on module init */
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
@@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
else
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ /* Initialize GTT memory pool */
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned)(gtt_size / (1024 * 1024)));
+ /* Initialize various on-chip memory pools */
adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
}
+ /* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_ttm_late_init - Handle any late initialization for
+ * amdgpu_ttm
+ */
+void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+{
+ /* return the VGA stolen memory (if any) back to VRAM */
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+}
+
+/**
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
if (!adev->mman.initialized)
return;
amdgpu_ttm_debugfs_fini(adev);
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
@@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
#endif
};
+/**
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
@@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+/**
+ * amdgpu_ttm_gtt_read - Linear read access to GTT memory
+ */
static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
#endif
+/**
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ /* retrieve the IOMMU domain if any for this device */
dom = iommu_get_domain_for_dev(adev->dev);
while (size) {
@@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
bytes = bytes < size ? bytes : size;
+ /* Translate the bus address to a physical address. If
+ * the domain is NULL it means there is no IOMMU active
+ * and the address translation is the identity
+ */
addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
return result;
}
+/**
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 6ea7de863041..e969c879d87e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_late_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5916cc25e28b..f55f72a37ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
- DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
- le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+ DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+ if (version_minor == 1) {
+ const struct rlc_firmware_header_v2_1 *v2_1 =
+ container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+ DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+ le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+ DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+ DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+ DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+ DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+ DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+ DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+ DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+ DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+ DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+ DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+ DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+ DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+ le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+ }
} else {
DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
}
@@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
+ case CHIP_VEGA20:
+ return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
}
@@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
- ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+ memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+ ucode->ucode_size);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 30b5500dc152..08e38579af24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 {
uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
};
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+ struct rlc_firmware_header_v2_0 v2_0;
+ uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+ uint32_t save_restore_list_cntl_ucode_ver;
+ uint32_t save_restore_list_cntl_feature_ver;
+ uint32_t save_restore_list_cntl_size_bytes;
+ uint32_t save_restore_list_cntl_offset_bytes;
+ uint32_t save_restore_list_gpm_ucode_ver;
+ uint32_t save_restore_list_gpm_feature_ver;
+ uint32_t save_restore_list_gpm_size_bytes;
+ uint32_t save_restore_list_gpm_offset_bytes;
+ uint32_t save_restore_list_srm_ucode_ver;
+ uint32_t save_restore_list_srm_feature_ver;
+ uint32_t save_restore_list_srm_size_bytes;
+ uint32_t save_restore_list_srm_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -148,6 +166,7 @@ union amdgpu_firmware_header {
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
struct rlc_firmware_header_v2_0 rlc_v2_0;
+ struct rlc_firmware_header_v2_1 rlc_v2_1;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
@@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_RLC_G,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+ AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 627542b22ae4..bcf68f80bbf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -66,15 +66,18 @@
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
-#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
-#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
-#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
+#define UVD_GPCOM_VCPU_CMD 0x03c3
+#define UVD_GPCOM_VCPU_DATA0 0x03c4
+#define UVD_GPCOM_VCPU_DATA1 0x03c5
+#define UVD_NO_OP 0x03ff
+#define UVD_BASE_SI 0x3800
/**
* amdgpu_uvd_cs_ctx - Command submission parser context
@@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
@@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned version_major, version_minor, family_id;
- int i, r;
+ int i, j, r;
- INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
+ INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
@@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
default:
return -EINVAL;
}
@@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
- &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
- return r;
- }
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
- ring = &adev->uvd.ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
- rq, amdgpu_sched_jobs, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up UVD run queue.\n");
- return r;
- }
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+ return r;
+ }
- for (i = 0; i < adev->uvd.max_handles; ++i) {
- atomic_set(&adev->uvd.handles[i], 0);
- adev->uvd.filp[i] = NULL;
- }
+ ring = &adev->uvd.inst[j].ring;
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
+ rq, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
+ return r;
+ }
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ atomic_set(&adev->uvd.inst[j].handles[i], 0);
+ adev->uvd.inst[j].filp[i] = NULL;
+ }
+ }
/* from uvd v5.0 HW addressing capacity increased to 64 bits */
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
adev->uvd.address_64_bit = true;
@@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
- int i;
- kfree(adev->uvd.saved_bo);
+ int i, j;
- drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ kfree(adev->uvd.inst[j].saved_bo);
- amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
- &adev->uvd.gpu_addr,
- (void **)&adev->uvd.cpu_addr);
+ drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
- amdgpu_ring_fini(&adev->uvd.ring);
+ amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+ &adev->uvd.inst[j].gpu_addr,
+ (void **)&adev->uvd.inst[j].cpu_addr);
- for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring);
+ for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
release_firmware(adev->uvd.fw);
return 0;
@@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
- int i;
-
- if (adev->uvd.vcpu_bo == NULL)
- return 0;
+ int i, j;
- cancel_delayed_work_sync(&adev->uvd.idle_work);
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.inst[j].vcpu_bo == NULL)
+ continue;
- /* only valid for physical mode */
- if (adev->asic_type < CHIP_POLARIS10) {
- for (i = 0; i < adev->uvd.max_handles; ++i)
- if (atomic_read(&adev->uvd.handles[i]))
- break;
+ cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
- if (i == adev->uvd.max_handles)
- return 0;
- }
+ /* only valid for physical mode */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ if (atomic_read(&adev->uvd.inst[j].handles[i]))
+ break;
- size = amdgpu_bo_size(adev->uvd.vcpu_bo);
- ptr = adev->uvd.cpu_addr;
+ if (i == adev->uvd.max_handles)
+ continue;
+ }
- adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
- if (!adev->uvd.saved_bo)
- return -ENOMEM;
+ size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+ ptr = adev->uvd.inst[j].cpu_addr;
- memcpy_fromio(adev->uvd.saved_bo, ptr, size);
+ adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+ if (!adev->uvd.inst[j].saved_bo)
+ return -ENOMEM;
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+ }
return 0;
}
@@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
{
unsigned size;
void *ptr;
+ int i;
- if (adev->uvd.vcpu_bo == NULL)
- return -EINVAL;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
- size = amdgpu_bo_size(adev->uvd.vcpu_bo);
- ptr = adev->uvd.cpu_addr;
+ size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+ ptr = adev->uvd.inst[i].cpu_addr;
- if (adev->uvd.saved_bo != NULL) {
- memcpy_toio(ptr, adev->uvd.saved_bo, size);
- kfree(adev->uvd.saved_bo);
- adev->uvd.saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned offset;
-
- hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ if (adev->uvd.inst[i].saved_bo != NULL) {
+ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+ kfree(adev->uvd.inst[i].saved_bo);
+ adev->uvd.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned offset;
+
+ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
+ memset_io(ptr, 0, size);
+ /* to restore uvd fence seq */
+ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
}
- memset_io(ptr, 0, size);
- /* to restore uvd fence seq */
- amdgpu_fence_driver_force_completion(&adev->uvd.ring);
}
-
return 0;
}
void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
- int i, r;
+ struct amdgpu_ring *ring;
+ int i, j, r;
- for (i = 0; i < adev->uvd.max_handles; ++i) {
- uint32_t handle = atomic_read(&adev->uvd.handles[i]);
- if (handle != 0 && adev->uvd.filp[i] == filp) {
- struct dma_fence *fence;
-
- r = amdgpu_uvd_get_destroy_msg(ring, handle,
- false, &fence);
- if (r) {
- DRM_ERROR("Error destroying UVD (%d)!\n", r);
- continue;
- }
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ ring = &adev->uvd.inst[j].ring;
+
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
+ if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
+ struct dma_fence *fence;
+
+ r = amdgpu_uvd_get_destroy_msg(ring, handle,
+ false, &fence);
+ if (r) {
+ DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
+ continue;
+ }
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
- adev->uvd.filp[i] = NULL;
- atomic_set(&adev->uvd.handles[i], 0);
+ adev->uvd.inst[j].filp[i] = NULL;
+ atomic_set(&adev->uvd.inst[j].handles[i], 0);
+ }
}
}
}
@@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
void *ptr;
long r;
int i;
+ uint32_t ip_instance = ctx->parser->job->ring->me;
if (offset & 0x3F) {
- DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+ DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
return -EINVAL;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
- DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
+ DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
return r;
}
@@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
handle = msg[2];
if (handle == 0) {
- DRM_ERROR("Invalid UVD handle!\n");
+ DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
return -EINVAL;
}
@@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* try to alloc a new handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
- if (atomic_read(&adev->uvd.handles[i]) == handle) {
- DRM_ERROR("Handle 0x%x already in use!\n", handle);
+ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+ DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
return -EINVAL;
}
- if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
- adev->uvd.filp[i] = ctx->parser->filp;
+ if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
+ adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
return 0;
}
}
- DRM_ERROR("No more free UVD handles!\n");
+ DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
return -ENOSPC;
case 1:
@@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* validate the handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
- if (atomic_read(&adev->uvd.handles[i]) == handle) {
- if (adev->uvd.filp[i] != ctx->parser->filp) {
- DRM_ERROR("UVD handle collision detected!\n");
+ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+ if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
+ DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
return -EINVAL;
}
return 0;
}
}
- DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+ DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
return -ENOENT;
case 2:
/* it's a destroy msg, free the handle */
for (i = 0; i < adev->uvd.max_handles; ++i)
- atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+ atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
amdgpu_bo_kunmap(bo);
return 0;
default:
- DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+ DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
return -EINVAL;
}
BUG();
@@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
}
if ((cmd == 0 || cmd == 0x3) &&
- (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
+ (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
start, end);
return -EINVAL;
@@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
uint64_t addr;
long r;
int i;
+ unsigned offset_idx = 0;
+ unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
@@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
goto err;
if (adev->asic_type >= CHIP_VEGA10) {
- data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
- data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
- data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
- data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
- } else {
- data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
- data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
- data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
- data[3] = PACKET0(mmUVD_NO_OP, 0);
+ offset_idx = 1 + ring->me;
+ offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+ offset[2] = adev->reg_offset[UVD_HWIP][1][1];
}
+ data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+ data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+ data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+ data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = data[0];
@@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r)
goto err_free;
- r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+ r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
@@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, uvd.idle_work.work);
- unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
+ container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
+ unsigned fences = 0, i, j;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+ for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+ fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+ }
+ }
if (fences == 0) {
if (adev->pm.dpm_enabled) {
@@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
AMD_CG_STATE_GATE);
}
} else {
- schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+ schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
}
}
@@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
if (amdgpu_sriov_vf(adev))
return;
- set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+ set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
@@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
{
if (!amdgpu_sriov_vf(ring->adev))
- schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
}
/**
@@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence;
long r;
+ uint32_t ip_instance = ring->me;
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
if (r) {
- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
goto error;
}
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
+ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
r = -ETIMEDOUT;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
} else {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
r = 0;
}
@@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
* necessarily linear. So we need to count
* all non-zero handles.
*/
- if (atomic_read(&adev->uvd.handles[i]))
+ if (atomic_read(&adev->uvd.inst->handles[i]))
used_handles++;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 32ea20b99e53..b1579fba134c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,30 +31,37 @@
#define AMDGPU_UVD_SESSION_SIZE (50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET 256
+#define AMDGPU_MAX_UVD_INSTANCES 2
+
#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \
(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
8) - AMDGPU_UVD_FIRMWARE_OFFSET)
-struct amdgpu_uvd {
+struct amdgpu_uvd_inst {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
- unsigned fw_version;
void *saved_bo;
- unsigned max_handles;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work;
- const struct firmware *fw; /* UVD firmware */
struct amdgpu_ring ring;
struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
struct amdgpu_irq_src irq;
- bool address_64_bit;
- bool use_ctx_buf;
struct drm_sched_entity entity;
struct drm_sched_entity entity_enc;
uint32_t srbm_soft_reset;
+};
+
+struct amdgpu_uvd {
+ const struct firmware *fw; /* UVD firmware */
+ unsigned fw_version;
+ unsigned max_handles;
unsigned num_enc_rings;
+ uint8_t num_uvd_inst;
+ bool address_64_bit;
+ bool use_ctx_buf;
+ struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a33804bd3314..23d960ec1cf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -51,11 +51,13 @@
#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
-#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
-#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
MODULE_FIRMWARE(FIRMWARE_POLARIS10);
MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
@@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS12:
fw_name = FIRMWARE_POLARIS12;
break;
+ case CHIP_VEGAM:
+ fw_name = FIRMWARE_VEGAM;
+ break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
case CHIP_VEGA12:
fw_name = FIRMWARE_VEGA12;
break;
+ case CHIP_VEGA20:
+ fw_name = FIRMWARE_VEGA20;
+ break;
default:
return -EINVAL;
@@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
ring = &adev->vce.ring[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
- rq, amdgpu_sched_jobs, NULL);
+ rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCE run queue.\n");
return r;
@@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
if (r)
goto out;
break;
+
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
+ idx + 2, 0, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
+ idx + 7, 0, 0);
+ if (r)
+ goto out;
+ break;
}
idx += len / 4;
@@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
goto out;
break;
+ case 0x0500000d: /* MV buffer */
+ r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
+ idx + 2, *size, 0);
+ if (r)
+ goto out;
+
+ r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
+ idx + 7, *size / 12, 0);
+ if (r)
+ goto out;
+ break;
+
default:
DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 58e495330b38..8851bcdfc260 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
ring = &adev->vcn.ring_dec;
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
- rq, amdgpu_sched_jobs, NULL);
+ rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCN dec run queue.\n");
return r;
@@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
ring = &adev->vcn.ring_enc[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
+ rq, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCN enc run queue.\n");
return r;
@@ -205,13 +205,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+ unsigned i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+ }
if (fences == 0) {
- if (adev->pm.dpm_enabled) {
- /* might be used when with pg/cg
+ if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
- */
- }
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -223,9 +228,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
if (set_clocks && adev->pm.dpm_enabled) {
- /* might be used when with pg/cg
- amdgpu_dpm_enable_uvd(adev, true);
- */
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 2fd7db891689..181e6afa9847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,17 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
+enum engine_status_constants {
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+ UVD_STATUS__UVD_BUSY = 0x00000004,
+ GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+ UVD_STATUS__IDLE = 0x2,
+ UVD_STATUS__BUSY = 0x5,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+ UVD_STATUS__RBC_BUSY = 0x1,
+};
+
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index da55a78d7380..ccba88cc8c54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -94,6 +94,34 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
};
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo)
+{
+ base->vm = vm;
+ base->bo = bo;
+ INIT_LIST_HEAD(&base->bo_list);
+ INIT_LIST_HEAD(&base->vm_status);
+
+ if (!bo)
+ return;
+ list_add_tail(&base->bo_list, &bo->va);
+
+ if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+ return;
+
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+ return;
+
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ list_move_tail(&base->vm_status, &vm->evicted);
+}
+
/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
@@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
struct ttm_bo_global *glob = adev->mman.bdev.glob;
- int r;
+ struct amdgpu_vm_bo_base *bo_base, *tmp;
+ int r = 0;
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->evicted)) {
- struct amdgpu_vm_bo_base *bo_base;
- struct amdgpu_bo *bo;
+ list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
+ struct amdgpu_bo *bo = bo_base->bo;
- bo_base = list_first_entry(&vm->evicted,
- struct amdgpu_vm_bo_base,
- vm_status);
- spin_unlock(&vm->status_lock);
-
- bo = bo_base->bo;
- BUG_ON(!bo);
if (bo->parent) {
r = validate(param, bo);
if (r)
- return r;
+ break;
spin_lock(&glob->lru_lock);
ttm_bo_move_to_lru_tail(&bo->tbo);
@@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&glob->lru_lock);
}
- if (bo->tbo.type == ttm_bo_type_kernel &&
- vm->use_cpu_for_update) {
- r = amdgpu_bo_kmap(bo, NULL);
- if (r)
- return r;
- }
-
- spin_lock(&vm->status_lock);
- if (bo->tbo.type != ttm_bo_type_kernel)
+ if (bo->tbo.type != ttm_bo_type_kernel) {
+ spin_lock(&vm->moved_lock);
list_move(&bo_base->vm_status, &vm->moved);
- else
+ spin_unlock(&vm->moved_lock);
+ } else {
list_move(&bo_base->vm_status, &vm->relocated);
+ }
}
- spin_unlock(&vm->status_lock);
- return 0;
+ spin_lock(&glob->lru_lock);
+ list_for_each_entry(bo_base, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = bo_base->bo;
+
+ if (!bo->parent)
+ continue;
+
+ ttm_bo_move_to_lru_tail(&bo->tbo);
+ if (bo->shadow)
+ ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+ }
+ spin_unlock(&glob->lru_lock);
+
+ return r;
}
/**
@@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- bool ready;
-
- spin_lock(&vm->status_lock);
- ready = list_empty(&vm->evicted);
- spin_unlock(&vm->status_lock);
-
- return ready;
+ return list_empty(&vm->evicted);
}
/**
@@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_bo *pt;
if (!entry->base.bo) {
- r = amdgpu_bo_create(adev,
- amdgpu_vm_bo_size(adev, level),
- AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, flags,
- ttm_bo_type_kernel, resv, &pt);
+ struct amdgpu_bo_param bp;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = amdgpu_vm_bo_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = flags;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = resv;
+ r = amdgpu_bo_create(adev, &bp, &pt);
if (r)
return r;
@@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
*/
pt->parent = amdgpu_bo_ref(parent->base.bo);
- entry->base.vm = vm;
- entry->base.bo = pt;
- list_add_tail(&entry->base.bo_list, &pt->va);
- spin_lock(&vm->status_lock);
- list_add(&entry->base.vm_status, &vm->relocated);
- spin_unlock(&vm->status_lock);
+ amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+ list_move(&entry->base.vm_status, &vm->relocated);
}
if (level < AMDGPU_VM_PTB) {
@@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
if (vm_flush_needed || pasid_mapping_needed) {
- r = amdgpu_fence_emit(ring, &fence);
+ r = amdgpu_fence_emit(ring, &fence, 0);
if (r)
return r;
}
@@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
if (!entry->base.bo)
continue;
- spin_lock(&vm->status_lock);
- if (list_empty(&entry->base.vm_status))
- list_add(&entry->base.vm_status, &vm->relocated);
- spin_unlock(&vm->status_lock);
+ if (!entry->base.moved)
+ list_move(&entry->base.vm_status, &vm->relocated);
amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
}
}
@@ -926,6 +946,14 @@ restart:
params.adev = adev;
if (vm->use_cpu_for_update) {
+ struct amdgpu_vm_bo_base *bo_base;
+
+ list_for_each_entry(bo_base, &vm->relocated, vm_status) {
+ r = amdgpu_bo_kmap(bo_base->bo, NULL);
+ if (unlikely(r))
+ return r;
+ }
+
r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
if (unlikely(r))
return r;
@@ -941,7 +969,6 @@ restart:
params.func = amdgpu_vm_do_set_ptes;
}
- spin_lock(&vm->status_lock);
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *bo_base, *parent;
struct amdgpu_vm_pt *pt, *entry;
@@ -950,14 +977,12 @@ restart:
bo_base = list_first_entry(&vm->relocated,
struct amdgpu_vm_bo_base,
vm_status);
- list_del_init(&bo_base->vm_status);
- spin_unlock(&vm->status_lock);
+ bo_base->moved = false;
+ list_move(&bo_base->vm_status, &vm->idle);
bo = bo_base->bo->parent;
- if (!bo) {
- spin_lock(&vm->status_lock);
+ if (!bo)
continue;
- }
parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
bo_list);
@@ -966,12 +991,10 @@ restart:
amdgpu_vm_update_pde(&params, vm, pt, entry);
- spin_lock(&vm->status_lock);
if (!vm->use_cpu_for_update &&
(ndw - params.ib->length_dw) < 32)
break;
}
- spin_unlock(&vm->status_lock);
if (vm->use_cpu_for_update) {
/* Flush HDP */
@@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
if (entry->huge) {
/* Add the entry to the relocated list to update it. */
entry->huge = false;
- spin_lock(&p->vm->status_lock);
list_move(&entry->base.vm_status, &p->vm->relocated);
- spin_unlock(&p->vm->status_lock);
}
return;
}
@@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
amdgpu_asic_flush_hdp(adev, NULL);
}
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->moved_lock);
list_del_init(&bo_va->base.vm_status);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->moved_lock);
+
+ /* If the BO is not in its preferred location add it back to
+ * the evicted list so that it gets validated again on the
+ * next command submission.
+ */
+ if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+ uint32_t mem_type = bo->tbo.mem.mem_type;
+
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+ list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+ else
+ list_add(&bo_va->base.vm_status, &vm->idle);
+ }
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
@@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
+ struct amdgpu_bo_va *bo_va, *tmp;
+ struct list_head moved;
bool clear;
- int r = 0;
-
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
- struct amdgpu_bo_va *bo_va;
- struct reservation_object *resv;
+ int r;
- bo_va = list_first_entry(&vm->moved,
- struct amdgpu_bo_va, base.vm_status);
- spin_unlock(&vm->status_lock);
+ INIT_LIST_HEAD(&moved);
+ spin_lock(&vm->moved_lock);
+ list_splice_init(&vm->moved, &moved);
+ spin_unlock(&vm->moved_lock);
- resv = bo_va->base.bo->tbo.resv;
+ list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
+ struct reservation_object *resv = bo_va->base.bo->tbo.resv;
/* Per VM BOs never need to bo cleared in the page tables */
if (resv == vm->root.base.bo->tbo.resv)
@@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
clear = true;
r = amdgpu_vm_bo_update(adev, bo_va, clear);
- if (r)
+ if (r) {
+ spin_lock(&vm->moved_lock);
+ list_splice(&moved, &vm->moved);
+ spin_unlock(&vm->moved_lock);
return r;
+ }
if (!clear && resv != vm->root.base.bo->tbo.resv)
reservation_object_unlock(resv);
- spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->status_lock);
- return r;
+ return 0;
}
/**
@@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
if (bo_va == NULL) {
return NULL;
}
- bo_va->base.vm = vm;
- bo_va->base.bo = bo;
- INIT_LIST_HEAD(&bo_va->base.bo_list);
- INIT_LIST_HEAD(&bo_va->base.vm_status);
+ amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
- if (!bo)
- return bo_va;
-
- list_add_tail(&bo_va->base.bo_list, &bo->va);
-
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
- return bo_va;
-
- if (bo->preferred_domains &
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
- return bo_va;
-
- /*
- * We checked all the prerequisites, but it looks like this per VM BO
- * is currently evicted. add the BO to the evicted list to make sure it
- * is validated on next VM use to avoid fault.
- * */
- spin_lock(&vm->status_lock);
- list_move_tail(&bo_va->base.vm_status, &vm->evicted);
- spin_unlock(&vm->status_lock);
-
return bo_va;
}
@@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (mapping->flags & AMDGPU_PTE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
- spin_lock(&vm->status_lock);
- if (list_empty(&bo_va->base.vm_status))
- list_add(&bo_va->base.vm_status, &vm->moved);
- spin_unlock(&vm->status_lock);
+ if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
+ !bo_va->base.moved) {
+ spin_lock(&vm->moved_lock);
+ list_move(&bo_va->base.vm_status, &vm->moved);
+ spin_unlock(&vm->moved_lock);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@@ -2198,9 +2209,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_del(&bo_va->base.bo_list);
- spin_lock(&vm->status_lock);
+ spin_lock(&vm->moved_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->status_lock);
+ spin_unlock(&vm->moved_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2234,33 +2245,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
{
struct amdgpu_vm_bo_base *bo_base;
+ /* shadow bo doesn't have bo base, its validation needs its parent */
+ if (bo->parent && bo->parent->shadow == bo)
+ bo = bo->parent;
+
list_for_each_entry(bo_base, &bo->va, bo_list) {
struct amdgpu_vm *vm = bo_base->vm;
+ bool was_moved = bo_base->moved;
bo_base->moved = true;
if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
- spin_lock(&bo_base->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&bo_base->vm_status, &vm->evicted);
else
list_move_tail(&bo_base->vm_status,
&vm->evicted);
- spin_unlock(&bo_base->vm->status_lock);
continue;
}
- if (bo->tbo.type == ttm_bo_type_kernel) {
- spin_lock(&bo_base->vm->status_lock);
- if (list_empty(&bo_base->vm_status))
- list_add(&bo_base->vm_status, &vm->relocated);
- spin_unlock(&bo_base->vm->status_lock);
+ if (was_moved)
continue;
- }
- spin_lock(&bo_base->vm->status_lock);
- if (list_empty(&bo_base->vm_status))
- list_add(&bo_base->vm_status, &vm->moved);
- spin_unlock(&bo_base->vm->status_lock);
+ if (bo->tbo.type == ttm_bo_type_kernel) {
+ list_move(&bo_base->vm_status, &vm->relocated);
+ } else {
+ spin_lock(&bo_base->vm->moved_lock);
+ list_move(&bo_base->vm_status, &vm->moved);
+ spin_unlock(&bo_base->vm->moved_lock);
+ }
}
}
@@ -2355,6 +2367,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid)
{
+ struct amdgpu_bo_param bp;
+ struct amdgpu_bo *root;
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
unsigned ring_instance;
@@ -2367,10 +2381,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->va = RB_ROOT_CACHED;
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
- spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->evicted);
INIT_LIST_HEAD(&vm->relocated);
+ spin_lock_init(&vm->moved_lock);
INIT_LIST_HEAD(&vm->moved);
+ INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->freed);
/* create scheduler entity for page table updates */
@@ -2380,7 +2395,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
ring = adev->vm_manager.vm_pte_rings[ring_instance];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
r = drm_sched_entity_init(&ring->sched, &vm->entity,
- rq, amdgpu_sched_jobs, NULL);
+ rq, NULL);
if (r)
return r;
@@ -2409,24 +2424,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
- r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
- ttm_bo_type_kernel, NULL, &vm->root.base.bo);
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = align;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.flags = flags;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = NULL;
+ r = amdgpu_bo_create(adev, &bp, &root);
if (r)
goto error_free_sched_entity;
- r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ r = amdgpu_bo_reserve(root, true);
if (r)
goto error_free_root;
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+ r = amdgpu_vm_clear_bo(adev, vm, root,
adev->vm_manager.root_level,
vm->pte_support_ats);
if (r)
goto error_unreserve;
- vm->root.base.vm = vm;
- list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
- list_add_tail(&vm->root.base.vm_status, &vm->evicted);
+ amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
amdgpu_bo_unreserve(vm->root.base.bo);
if (pasid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 30f080364c97..061b99a18cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry;
/* PDE Block Fragment Size for VEGA10 */
#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
-/* VEGA10 only */
+
+/* For GFX9 */
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
-/* For Raven */
+#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
@@ -167,9 +168,6 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
- /* protecting invalidated */
- spinlock_t status_lock;
-
/* BOs who needs a validation */
struct list_head evicted;
@@ -178,6 +176,10 @@ struct amdgpu_vm {
/* BOs moved, but not yet updated in the PT */
struct list_head moved;
+ spinlock_t moved_lock;
+
+ /* All BOs of this VM not currently in the state machine */
+ struct list_head idle;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@@ -186,9 +188,6 @@ struct amdgpu_vm {
struct amdgpu_vm_pt root;
struct dma_fence *last_update;
- /* protecting freed */
- spinlock_t freed_lock;
-
/* Scheduler entity for page table updates */
struct drm_sched_entity entity;
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 47ef3e6e7178..a266dcf5daed 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
pi->pcie_dpm_key_disabled = 0;
pi->thermal_sclk_dpm_enabled = 0;
- if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+ if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
/* init the sysfs and debugfs files late */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 0df22030e713..8ff4c60d1b59 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_config_memsize = &cik_get_config_memsize,
.flush_hdp = &cik_flush_hdp,
.invalidate_hdp = &cik_invalidate_hdp,
+ .need_full_reset = &cik_need_full_reset,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 452f88ea46a2..ada241bfeee9 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index a7c1c584a191..a5b96eac3033 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
ARRAY_SIZE(polaris11_golden_settings_a11));
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
polaris10_golden_settings_a11,
ARRAY_SIZE(polaris10_golden_settings_a11));
@@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
num_crtc = 2;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
num_crtc = 6;
break;
case CHIP_POLARIS11:
@@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.num_pins = 7;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
adev->mode_info.audio.num_pins = 8;
break;
case CHIP_POLARIS11:
@@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
struct amdgpu_encoder *amdgpu_encoder =
to_amdgpu_encoder(amdgpu_crtc->encoder);
int encoder_mode =
@@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle)
adev->mode_info.num_dig = 9;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
adev->mode_info.num_hpd = 6;
adev->mode_info.num_dig = 6;
break;
@@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle)
amdgpu_atombios_encoder_init_dig(adev);
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
amdgpu_atombios_crtc_set_dce_clock(adev, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 9f67b7fd3487..394cc1e8fe20 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index f55422cbd77a..c9b9ab8f1b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_framebuffer *amdgpu_fb;
struct drm_framebuffer *target_fb;
struct drm_gem_object *obj;
struct amdgpu_bo *abo;
@@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return 0;
}
- if (atomic) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
+ if (atomic)
target_fb = fb;
- } else {
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+ else
target_fb = crtc->primary->fb;
- }
/* If atomic, assume fb object is pinned & idle & fenced and
* just update base pointers
*/
- obj = amdgpu_fb->obj;
+ obj = target_fb->obj[0];
abo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(abo, false);
if (unlikely(r != 0))
@@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
if (!atomic && fb && fb != crtc->primary->fb) {
- amdgpu_fb = to_amdgpu_framebuffer(fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r != 0))
return r;
@@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index b51f05dc9582..dbf2ccd0c744 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
if (crtc->primary->fb) {
int r;
- struct amdgpu_framebuffer *amdgpu_fb;
struct amdgpu_bo *abo;
- amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
- abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
r = amdgpu_bo_reserve(abo, true);
if (unlikely(r))
DRM_ERROR("failed to reserve abo before unpin\n");
@@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
return 0;
}
-static int dce_virtual_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
return MODE_OK;
@@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_VEGAM:
dce_v11_0_disable_dce(adev);
break;
case CHIP_TOPAZ:
@@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
break;
default:
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 000000000000..9935371db7ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_init (struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+ tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+
+ return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* Put DF on broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, true);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V1_7_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit boradcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+ ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+ .init = df_v1_7_init,
+ .enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+ .get_fb_channel_number = df_v1_7_get_fb_channel_number,
+ .get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+ .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v1_7_get_clockgating_state,
+ .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
index 214f370c5efd..74621104c487 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,33 +20,21 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#ifndef PP_SOC15_H
-#define PP_SOC15_H
-#include "soc15_hw_ip.h"
-#include "vega10_ip_offset.h"
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
-inline static uint32_t soc15_get_register_offset(
- uint32_t hw_id,
- uint32_t inst,
- uint32_t segment,
- uint32_t offset)
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
{
- uint32_t reg = 0;
+ DF_V1_7_MGCG_DISABLE = 0,
+ DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+ DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+ DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+ DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+ DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
+};
- if (hw_id == THM_HWID)
- reg = THM_BASE.instance[inst].segment[segment] + offset;
- else if (hw_id == NBIF_HWID)
- reg = NBIF_BASE.instance[inst].segment[segment] + offset;
- else if (hw_id == MP1_HWID)
- reg = MP1_BASE.instance[inst].segment[segment] + offset;
- else if (hw_id == DF_HWID)
- reg = DF_BASE.instance[inst].segment[segment] + offset;
- else if (hw_id == GC_HWID)
- reg = GC_BASE.instance[inst].segment[segment] + offset;
- else if (hw_id == SMUIO_HWID)
- reg = SMUIO_BASE.instance[inst].segment[segment] + offset;
- return reg;
-}
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 000000000000..60608b3df881
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+ 16, 32, 0, 0, 0, 2, 4, 8};
+
+static void df_v3_6_init(struct amdgpu_device *adev)
+{
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+ tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+ } else
+ WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+ mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+ tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+ return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+ int fb_channel_number;
+
+ fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number))
+ fb_channel_number = 0;
+
+ return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp;
+
+ /* Put DF on broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, true);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
+
+ /* Exit broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ u32 tmp;
+
+ /* AMD_CG_SUPPORT_DF_MGCG */
+ tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+ if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+ *flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+ .init = df_v3_6_init,
+ .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+ .get_fb_channel_number = df_v3_6_get_fb_channel_number,
+ .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+ .update_medium_grain_clock_gating =
+ df_v3_6_update_medium_grain_clock_gating,
+ .get_clockgating_state = df_v3_6_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 000000000000..e79c58e5efcb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
+
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+ DF_V3_6_MGCG_DISABLE = 0,
+ DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+ DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+ DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+ DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+ DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
+};
+
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e14263fca1c9..818874b13c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
-
MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
+
MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
+MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
+MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vegam_me.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
+
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] =
mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
};
+static const u32 golden_settings_vegam_a11[] =
+{
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
+ mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+ mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+ mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+ mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
+ mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+ mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
+ mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+ mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+ mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+ mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+ mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 vegam_golden_common_all[] =
+{
+ mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+ mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+ mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+ mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+ mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
static const u32 golden_settings_polaris11_a11[] =
{
mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
tonga_golden_common_all,
ARRAY_SIZE(tonga_golden_common_all));
break;
+ case CHIP_VEGAM:
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_vegam_a11,
+ ARRAY_SIZE(golden_settings_vegam_a11));
+ amdgpu_device_program_register_sequence(adev,
+ vegam_golden_common_all,
+ ARRAY_SIZE(vegam_golden_common_all));
+ break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
amdgpu_device_program_register_sequence(adev,
@@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
- case CHIP_POLARIS11:
- chip_name = "polaris11";
+ case CHIP_STONEY:
+ chip_name = "stoney";
break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
- case CHIP_STONEY:
- chip_name = "stoney";
+ case CHIP_VEGAM:
+ chip_name = "vegam";
break;
default:
BUG();
@@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
ret = amdgpu_atombios_get_gfx_info(adev);
if (ret)
return ret;
@@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
switch (adev->asic_type) {
- case CHIP_FIJI:
case CHIP_TONGA:
+ case CHIP_CARRIZO:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
- case CHIP_POLARIS10:
- case CHIP_CARRIZO:
+ case CHIP_VEGAM:
adev->gfx.mec.num_mec = 2;
break;
case CHIP_TOPAZ:
@@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
break;
case CHIP_FIJI:
+ case CHIP_VEGAM:
modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
{
switch (adev->asic_type) {
case CHIP_FIJI:
+ case CHIP_VEGAM:
*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
RB_XSEL2(1) | PKR_MAP(2) |
PKR_XSEL(1) | PKR_YSEL(1) |
@@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
gfx_v8_0_init_power_gating(adev);
WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
} else if ((adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12)) {
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM)) {
gfx_v8_0_init_csb(adev);
gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev);
@@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
if (adev->asic_type == CHIP_POLARIS11 ||
adev->asic_type == CHIP_POLARIS10 ||
- adev->asic_type == CHIP_POLARIS12) {
+ adev->asic_type == CHIP_POLARIS12 ||
+ adev->asic_type == CHIP_VEGAM) {
tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
tmp &= ~0x3;
WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
bool enable)
{
if ((adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12))
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
amdgpu_device_ip_set_powergating_state(adev,
AMD_IP_BLOCK_TYPE_SMC,
@@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
else
@@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5b1822..d7530fdfaad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
@@ -41,7 +42,6 @@
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
-#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
#define mmPWR_MISC_CNTL_STATUS 0x0183
#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
@@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
MODULE_FIRMWARE("amdgpu/raven_ce.bin");
MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
};
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
+};
+
static const struct soc15_reg_golden golden_settings_gc_9_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
@@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
};
+static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+ mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+{
+ mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+ mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+};
+
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_2_1_vg12,
ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
break;
+ case CHIP_VEGA20:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg20,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg20));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_gc_9_1,
@@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+}
+
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
@@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
const struct rlc_firmware_header_v2_0 *rlc_hdr;
unsigned int *tmp = NULL;
unsigned int i = 0;
+ uint16_t version_major;
+ uint16_t version_minor;
DRM_DEBUG("\n");
@@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
goto out;
err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
adev->gfx.rlc.save_and_restore_offset =
@@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v9_0_init_rlc_ext_microcode(adev);
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
@@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ if (adev->gfx.rlc.is_rlc_v2_1) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
};
-static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
+ int err;
adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
@@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
DRM_INFO("fix gfx.config for vega12\n");
break;
+ case CHIP_VEGA20:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ gb_addr_config &= ~0xf3e777ff;
+ gb_addr_config |= 0x22014042;
+ /* check vbios table if gpu info is not available */
+ err = amdgpu_atomfirmware_get_gfx_info(adev);
+ if (err)
+ return err;
+ break;
case CHIP_RAVEN:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config,
GB_ADDR_CONFIG,
PIPE_INTERLEAVE_SIZE));
+
+ return 0;
}
static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.mec.num_mec = 2;
break;
@@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.ce_ram_size = 0x8000;
- gfx_v9_0_gpu_early_init(adev);
+ r = gfx_v9_0_gpu_early_init(adev);
+ if (r)
+ return r;
r = gfx_v9_0_ngg_init(adev);
if (r)
@@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
- tmp = adev->gmc.shared_aperture_start >> 48;
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
}
}
@@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.clear_state_size);
}
-static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
int indirect_offset,
int list_size,
int *unique_indirect_regs,
int *unique_indirect_reg_count,
- int max_indirect_reg_count,
int *indirect_start_offsets,
- int *indirect_start_offsets_count,
- int max_indirect_start_offsets_count)
+ int *indirect_start_offsets_count)
{
int idx;
- bool new_entry = true;
for (; indirect_offset < list_size; indirect_offset++) {
+ indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+ *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
- if (new_entry) {
- new_entry = false;
- indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
- *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
- BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
- }
+ while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+ indirect_offset += 2;
- if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
- new_entry = true;
- continue;
- }
+ /* look for the matching indice */
+ for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+ if (unique_indirect_regs[idx] ==
+ register_list_format[indirect_offset] ||
+ !unique_indirect_regs[idx])
+ break;
+ }
- indirect_offset += 2;
+ BUG_ON(idx >= *unique_indirect_reg_count);
- /* look for the matching indice */
- for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
- if (unique_indirect_regs[idx] ==
- register_list_format[indirect_offset])
- break;
- }
+ if (!unique_indirect_regs[idx])
+ unique_indirect_regs[idx] = register_list_format[indirect_offset];
- if (idx >= *unique_indirect_reg_count) {
- unique_indirect_regs[*unique_indirect_reg_count] =
- register_list_format[indirect_offset];
- idx = *unique_indirect_reg_count;
- *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
- BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
+ indirect_offset++;
}
-
- register_list_format[indirect_offset] = idx;
}
}
-static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
{
int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
int unique_indirect_reg_count = 0;
@@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
int indirect_start_offsets_count = 0;
int list_size = 0;
- int i = 0;
+ int i = 0, j = 0;
u32 tmp = 0;
u32 *register_list_format =
@@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.reg_list_format_size_bytes);
/* setup unique_indirect_regs array and indirect_start_offsets array */
- gfx_v9_0_parse_ind_reg_list(register_list_format,
- GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
- adev->gfx.rlc.reg_list_format_size_bytes >> 2,
- unique_indirect_regs,
- &unique_indirect_reg_count,
- ARRAY_SIZE(unique_indirect_regs),
- indirect_start_offsets,
- &indirect_start_offsets_count,
- ARRAY_SIZE(indirect_start_offsets));
+ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+ gfx_v9_1_parse_ind_reg_list(register_list_format,
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+ adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+ unique_indirect_regs,
+ &unique_indirect_reg_count,
+ indirect_start_offsets,
+ &indirect_start_offsets_count);
/* enable auto inc in case it is disabled */
tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
adev->gfx.rlc.register_restore[i]);
- /* load direct register */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
- for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
- adev->gfx.rlc.register_restore[i]);
-
/* load indirect register */
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
adev->gfx.rlc.reg_list_format_start);
- for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+
+ /* direct register portion */
+ for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
register_list_format[i]);
+ /* indirect register portion */
+ while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+ if (register_list_format[i] == 0xFFFFFFFF) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ continue;
+ }
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+
+ for (j = 0; j < unique_indirect_reg_count; j++) {
+ if (register_list_format[i] == unique_indirect_regs[j]) {
+ WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+ break;
+ }
+ }
+
+ BUG_ON(j >= unique_indirect_reg_count);
+
+ i++;
+ }
+
/* set save/restore list size */
list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
list_size = list_size >> 1;
@@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
adev->gfx.rlc.starting_offsets_start);
for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
- indirect_start_offsets[i]);
+ indirect_start_offsets[i]);
/* load unique indirect regs*/
for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
- unique_indirect_regs[i] & 0x3FFFF);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
- unique_indirect_regs[i] >> 20);
+ if (unique_indirect_regs[i] != 0) {
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+ + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+ unique_indirect_regs[i] & 0x3FFFF);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+ + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+ unique_indirect_regs[i] >> 20);
+ }
}
kfree(register_list_format);
@@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
{
+ if (!adev->gfx.rlc.is_rlc_v2_1)
+ return;
+
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_GFX_SMG |
AMD_PG_SUPPORT_GFX_DMG |
@@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
AMD_PG_SUPPORT_GDS |
AMD_PG_SUPPORT_RLC_SMU_HS)) {
gfx_v9_0_init_csb(adev);
- gfx_v9_0_init_rlc_save_restore_list(adev);
+ gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
- if (adev->asic_type == CHIP_RAVEN) {
- WREG32(mmRLC_JUMP_TABLE_RESTORE,
- adev->gfx.rlc.cp_table_gpu_addr >> 8);
- gfx_v9_0_init_gfx_power_gating(adev);
-
- if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
- gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
- gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
- } else {
- gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
- gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
- }
-
- if (adev->pg_flags & AMD_PG_SUPPORT_CP)
- gfx_v9_0_enable_cp_power_gating(adev, true);
- else
- gfx_v9_0_enable_cp_power_gating(adev, false);
- }
+ WREG32(mmRLC_JUMP_TABLE_RESTORE,
+ adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v9_0_init_gfx_power_gating(adev);
}
}
@@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_PG_STATE_UNGATE);
+
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_PG_STATE_GATE);
+ if (r)
+ return r;
+
return 0;
}
@@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- /* TODO: double check if we need to perform under safe mdoe */
- /* gfx_v9_0_enter_rlc_safe_mode(adev); */
+ gfx_v9_0_enter_rlc_safe_mode(adev);
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
- /* gfx_v9_0_exit_rlc_safe_mode(adev); */
+ gfx_v9_0_exit_rlc_safe_mode(adev);
}
static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
gfx_v9_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
}
amdgpu_ring_write(ring, header);
-BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
@@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
- EOP_TC_ACTION_EN |
- EOP_TC_WB_ACTION_EN |
- EOP_TC_MD_ACTION_EN |
+ amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+ EOP_TC_NC_ACTION_EN) :
+ (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EOP_TC_MD_ACTION_EN)) |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ if (amdgpu_sriov_vf(ring->adev))
+ gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_tmz = gfx_v9_0_ring_emit_tmz,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.set_priority = gfx_v9_0_ring_set_priority_compute,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_rreg = gfx_v9_0_ring_emit_rreg,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
@@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5617cf62c566..79f9ac29019b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
static int gmc_v6_0_sw_init(void *handle)
{
int r;
@@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.mc_mask = 0xffffffffffULL;
- adev->gmc.stolen_size = 256 * 1024;
-
adev->need_dma32 = false;
dma_bits = adev->need_dma32 ? 32 : 40;
r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
+
r = amdgpu_bo_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 80054f36e487..7147bfe25a23 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
static int gmc_v7_0_sw_init(void *handle)
{
int r;
@@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->gmc.stolen_size = 256 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d71d4cb68f9c..1edbe6b477b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
case CHIP_CARRIZO:
case CHIP_STONEY:
+ case CHIP_VEGAM:
return 0;
default: BUG();
}
@@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
- case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS10: /* all engines support GPUVM */
+ case CHIP_POLARIS11: /* all engines support GPUVM */
case CHIP_POLARIS12: /* all engines support GPUVM */
+ case CHIP_VEGAM: /* all engines support GPUVM */
default:
adev->gmc.gart_size = 256ULL << 20;
break;
@@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_late_init(adev);
+
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
+static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport = RREG32(mmVIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+ return size;
+}
+
#define mmMC_SEQ_MISC0_FIJI 0xA71
static int gmc_v8_0_sw_init(void *handle)
@@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle)
} else {
u32 tmp;
- if (adev->asic_type == CHIP_FIJI)
+ if ((adev->asic_type == CHIP_FIJI) ||
+ (adev->asic_type == CHIP_VEGAM))
tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
else
tmp = RREG32(mmMC_SEQ_MISC0);
@@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->gmc.stolen_size = 256 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
* IGP - can handle 40-bits
@@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e687363900bb..3c0a85d4e4ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -43,19 +43,13 @@
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
-#define mmDF_CS_AON0_DramBaseAddress0 0x0044
-#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0
-//DF_CS_AON0_DramBaseAddress0
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
+/* add these here since we already include dce12 headers and these are for DCN */
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
@@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
return pd_addr;
}
@@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
- adev->gmc.private_aperture_start =
- adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
@@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i;
int r;
+ /*
+ * TODO - Uncomment once GART corruption issue is fixed.
+ */
+ /* amdgpu_bo_late_init(adev); */
+
for(i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
unsigned vmhub = ring->funcs->vmhub;
@@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle)
DRM_INFO("ECC is active.\n");
} else if (r == 0) {
DRM_INFO("ECC is not present.\n");
+ adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
return r;
@@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
/* base offset of vram pages */
- if (adev->flags & AMD_IS_APU)
- adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
- else
- adev->vm_manager.vram_base_offset = 0;
+ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
}
/**
@@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
{
- u32 tmp;
int chansize, numchan;
int r;
@@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
else
chansize = 128;
- tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
- tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
- tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
- switch (tmp) {
- case 0:
- default:
- numchan = 1;
- break;
- case 1:
- numchan = 2;
- break;
- case 2:
- numchan = 0;
- break;
- case 3:
- numchan = 4;
- break;
- case 4:
- numchan = 0;
- break;
- case 5:
- numchan = 8;
- break;
- case 6:
- numchan = 0;
- break;
- case 7:
- numchan = 16;
- break;
- case 8:
- numchan = 2;
- break;
- }
+ numchan = adev->df_funcs->get_hbm_channel_number(adev);
adev->gmc.vram_width = numchan * chansize;
}
@@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10: /* all engines support GPUVM */
case CHIP_VEGA12: /* all engines support GPUVM */
+ case CHIP_VEGA20:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
+static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+#if 0
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+#endif
+ unsigned size;
+
+ /*
+ * TODO Remove once GART corruption is resolved
+ * Check related code in gmc_v9_0_sw_fini
+ * */
+ size = 9 * 1024 * 1024;
+
+#if 0
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport;
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+ 4);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ default:
+ viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
+ size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+ 4);
+ break;
+ }
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+ return 0;
+
+#endif
+ return size;
+}
+
static int gmc_v9_0_sw_init(void *handle)
{
int r;
@@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
@@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /*
- * It needs to reserve 8M stolen memory for vega10
- * TODO: Figure out how to avoid that...
- */
- adev->gmc.stolen_size = 8 * 1024 * 1024;
-
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 44-bits.
* IGP - can handle 44-bits
@@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
gmc_v9_0_gart_fini(adev);
+
+ /*
+ * TODO:
+ * Currently there is a bug where some memory client outside
+ * of the driver writes to first 8M of VRAM on S3 resume,
+ * this overrides GART which by default gets placed in first 8M and
+ * causes VM_FAULTS once GTT is accessed.
+ * Keep the stolen memory reservation until the while this is not solved.
+ * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+ */
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+
amdgpu_bo_fini(adev);
return 0;
@@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
ARRAY_SIZE(golden_settings_mmhub_1_0_0));
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 26ba984ab2b7..17f7f074cedc 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
pi->caps_tcp_ramping = true;
}
- if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+ if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
pi->caps_sclk_ds = true;
else
pi->caps_sclk_ds = false;
@@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle)
/* powerdown unused blocks for now */
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
kv_dpm_powergate_acp(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 43f925773b57..3d53c4413f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 493348672475..078f70faedcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
} while (timeout > 1);
flr_done:
- if (locked)
+ if (locked) {
+ adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
+ }
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_lockup_timeout == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index df34dc79d444..365517c0121e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -34,10 +34,19 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
+/* vega20 */
+#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
+#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
+
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+ if (adev->asic_type == CHIP_VEGA20)
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
+ else
+ tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
+ u32 range = 2;
+
+ if (adev->asic_type == CHIP_VEGA20)
+ range = 8;
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
- doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
@@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (adev->asic_type == CHIP_VEGA20)
+ return;
+
/* NBIF_MGCG_CTRL_LCLK */
def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 8da6da90b1c9..0cf48d26c676 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id
GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */
GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */
GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */
+ GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
+ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
};
+/*-----------------------------------------------------------------------------
+ NOTE: All physical addresses used in this interface are actually
+ GPU Virtual Addresses.
+*/
+
+
/* Control registers of the TEE Gfx interface. These are located in
* SRBM-to-PSP mailbox registers (total 8 registers).
*/
@@ -55,8 +64,8 @@ struct psp_gfx_ctrl
volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */
volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */
volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */
- volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */
- volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */
+ volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
+ volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */
};
@@ -78,6 +87,8 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */
GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */
GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
+ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
+ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
};
@@ -85,11 +96,11 @@ enum psp_gfx_cmd_id
/* Command to load Trusted Application binary into PSP OS. */
struct psp_gfx_cmd_load_ta
{
- uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */
- uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */
+ uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
+ uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */
uint32_t app_len; /* length of the TA binary in bytes */
- uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */
- uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */
+ uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */
uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */
/* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta
*/
struct psp_gfx_buf_desc
{
- uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */
- uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
};
@@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd
/* Command to setup TMR region. */
struct psp_gfx_cmd_setup_tmr
{
- uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */
- uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */
+ uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
+ uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
};
@@ -174,18 +185,32 @@ enum psp_gfx_fw_type
GFX_FW_TYPE_ISP = 16,
GFX_FW_TYPE_ACP = 17,
GFX_FW_TYPE_SMU = 18,
+ GFX_FW_TYPE_MMSCH = 19,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
+ GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
+ GFX_FW_TYPE_MAX = 23
};
/* Command to load HW IP FW. */
struct psp_gfx_cmd_load_ip_fw
{
- uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */
- uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */
+ uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+ uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
uint32_t fw_size; /* FW buffer size in bytes */
enum psp_gfx_fw_type fw_type; /* FW type */
};
+/* Command to save/restore HW IP FW. */
+struct psp_gfx_cmd_save_restore_ip_fw
+{
+ uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/
+ uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
+ uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
+ uint32_t buf_size; /* Size of the save/restore buffer in bytes */
+ enum psp_gfx_fw_type fw_type; /* FW type */
+};
/* All GFX ring buffer commands. */
union psp_gfx_commands
@@ -195,7 +220,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd;
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
-
+ struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
};
@@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp
/* These fields are used for RBI only. They are all 0 in GPCOM commands
*/
- uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */
- uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */
+ uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
+ uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */
uint32_t resp_offset; /* +20 offset within response buffer */
uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */
@@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp
/* Structure of the Ring Buffer Frame */
struct psp_gfx_rb_frame
{
- uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */
- uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */
+ uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
+ uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */
uint32_t cmd_buf_size; /* +8 command buffer size in bytes */
- uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */
- uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */
+ uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
+ uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
uint32_t fence_value; /* +20 Fence value */
uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */
uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */
uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */
uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
uint8_t reserved1[2]; /* +34 reserved, must be 0 */
- uint32_t reserved2[7]; /* +40 reserved, must be 0 */
- /* total 64 bytes */
+ uint32_t reserved2[7]; /* +36 reserved, must be 0 */
+ /* total 64 bytes */
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 8873d833a7f7..0ff136d02d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
case AMDGPU_UCODE_ID_RLC_G:
*type = GFX_FW_TYPE_RLC_G;
break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+ break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 196e75def1f2..0c768e388ace 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -41,6 +41,9 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
+
#define smnMP1_FIRMWARE_FLAGS 0x3010028
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index be20a387d961..aa9ab299fd32 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_program_register_sequence(adev,
golden_settings_polaris11_a11,
ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
case CHIP_FIJI:
chip_name = "fiji";
break;
- case CHIP_POLARIS11:
- chip_name = "polaris11";
- break;
case CHIP_POLARIS10:
chip_name = "polaris10";
break;
+ case CHIP_POLARIS11:
+ chip_name = "polaris11";
+ break;
case CHIP_POLARIS12:
chip_name = "polaris12";
break;
+ case CHIP_VEGAM:
+ chip_name = "vegam";
+ break;
case CHIP_CARRIZO:
chip_name = "carrizo";
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 399f876f9cad..ca53b3fba422 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
};
+static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+};
+
static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg12,
ARRAY_SIZE(golden_settings_sdma_vg12));
break;
+ case CHIP_VEGA20:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4_2,
+ ARRAY_SIZE(golden_settings_sdma_4_2));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
@@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA12:
chip_name = "vega12";
break;
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
}
+static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
+ int mem_space, int hdp,
+ uint32_t addr0, uint32_t addr1,
+ uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ if (mem_space) {
+ /* memory */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ } else {
+ /* registers */
+ amdgpu_ring_write(ring, addr0 << 2);
+ amdgpu_ring_write(ring, addr1 << 2);
+ }
+ amdgpu_ring_write(ring, ref); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
/**
* sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
@@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
else
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ sdma_v4_0_wait_reg_mem(ring, 0, 1,
+ adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ ref_and_mask, ref_and_mask, 10);
}
/**
@@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
uint64_t addr = ring->fence_drv.gpu_addr;
/* wait for idle */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
- SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
- amdgpu_ring_write(ring, addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
- amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xffffffff); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+ sdma_v4_0_wait_reg_mem(ring, 1, 0,
+ addr & 0xfffffffc,
+ upper_32_bits(addr) & 0xffffffff,
+ seq, 0xffffffff, 4);
}
@@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
- amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, val); /* reference */
- amdgpu_ring_write(ring, mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+ sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
}
static int sdma_v4_0_early_init(void *handle)
@@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
sdma_v4_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.pad_ib = sdma_v4_0_ring_pad_ib,
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index a675ec6d2811..c364ef94cc36 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool si_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we support soft reset */
+ return true;
+}
+
static int si_get_pcie_lanes(struct amdgpu_device *adev)
{
u32 link_width_cntl;
@@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_config_memsize = &si_get_config_memsize,
.flush_hdp = &si_flush_hdp,
.invalidate_hdp = &si_invalidate_hdp,
+ .need_full_reset = &si_need_full_reset,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 797d505bf9ee..b12d7c9d42a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -7580,7 +7580,7 @@ static int si_dpm_late_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!amdgpu_dpm)
+ if (!adev->pm.dpm_enabled)
return 0;
ret = si_set_temperature_range(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 51cf8a30f6c2..68b4a22a8892 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -41,8 +41,6 @@
#include "sdma1/sdma1_4_0_offset.h"
#include "hdp/hdp_4_0_offset.h"
#include "hdp/hdp_4_0_sh_mask.h"
-#include "mp/mp_9_0_offset.h"
-#include "mp/mp_9_0_sh_mask.h"
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
@@ -52,6 +50,8 @@
#include "gmc_v9_0.h"
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
#include "vega10_ih.h"
#include "sdma_v4_0.h"
#include "uvd_v7_0.h"
@@ -60,33 +60,6 @@
#include "dce_virtual.h"
#include "mxgpu_ai.h"
-#define mmFabricConfigAccessControl 0x0410
-#define mmFabricConfigAccessControl_BASE_IDX 0
-#define mmFabricConfigAccessControl_DEFAULT 0x00000000
-//FabricConfigAccessControl
-#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0
-#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1
-#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10
-#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L
-#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L
-#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L
-
-
-#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc
-#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0
-//DF_PIE_AON0_DfGlobalClkGater
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL
-
-enum {
- DF_MGCG_DISABLE = 0,
- DF_MGCG_ENABLE_00_CYCLE_DELAY =1,
- DF_MGCG_ENABLE_01_CYCLE_DELAY =2,
- DF_MGCG_ENABLE_15_CYCLE_DELAY =13,
- DF_MGCG_ENABLE_31_CYCLE_DELAY =14,
- DF_MGCG_ENABLE_63_CYCLE_DELAY =15
-};
-
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba
@@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+ { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)},
};
static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
} else {
if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
return adev->gfx.config.gb_addr_config;
+ else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))
+ return adev->gfx.config.db_debug2;
return RREG32(reg_offset);
}
}
@@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_RAVEN:
vega10_reg_base_init(adev);
break;
+ case CHIP_VEGA20:
+ vega20_reg_base_init(adev);
+ break;
default:
return -EINVAL;
}
if (adev->flags & AMD_IS_APU)
adev->nbio_funcs = &nbio_v7_0_funcs;
+ else if (adev->asic_type == CHIP_VEGA20)
+ adev->nbio_funcs = &nbio_v7_0_funcs;
else
adev->nbio_funcs = &nbio_v6_1_funcs;
+ if (adev->asic_type == CHIP_VEGA20)
+ adev->df_funcs = &df_v3_6_funcs;
+ else
+ adev->df_funcs = &df_v1_7_funcs;
adev->nbio_funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev))
@@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ if (adev->asic_type != CHIP_VEGA20) {
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ }
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
}
+static bool soc15_need_full_reset(struct amdgpu_device *adev)
+{
+ /* change this when we implement soft reset */
+ return true;
+}
+
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.get_config_memsize = &soc15_get_config_memsize,
.flush_hdp = &soc15_flush_hdp,
.invalidate_hdp = &soc15_invalidate_hdp,
+ .need_full_reset = &soc15_need_full_reset,
};
static int soc15_common_early_init(void *handle)
@@ -675,6 +670,27 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
+ case CHIP_VEGA20:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x28;
+ break;
case CHIP_RAVEN:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -694,8 +710,15 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_SDMA_MGCG |
- AMD_CG_SUPPORT_SDMA_LS;
- adev->pg_flags = AMD_PG_SUPPORT_SDMA;
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+
+ if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
adev->external_rev_id = 0x1;
break;
@@ -871,32 +894,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade
WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data);
}
-static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev,
- bool enable)
-{
- uint32_t data;
-
- /* Put DF on broadcast mode */
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl));
- data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- data |= DF_MGCG_ENABLE_15_CYCLE_DELAY;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- data |= DF_MGCG_DISABLE;
- WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
- }
-
- WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl),
- mmFabricConfigAccessControl_DEFAULT);
-}
-
static int soc15_common_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@@ -908,6 +905,7 @@ static int soc15_common_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
adev->nbio_funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
adev->nbio_funcs->update_medium_grain_light_sleep(adev,
@@ -920,7 +918,7 @@ static int soc15_common_set_clockgating_state(void *handle,
state == AMD_CG_STATE_GATE ? true : false);
soc15_update_rom_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
- soc15_update_df_medium_grain_clock_gating(adev,
+ adev->df_funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
break;
case CHIP_RAVEN:
@@ -973,10 +971,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
*flags |= AMD_CG_SUPPORT_ROM_MGCG;
- /* AMD_CG_SUPPORT_DF_MGCG */
- data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
- if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY)
- *flags |= AMD_CG_SUPPORT_DF_MGCG;
+ adev->df_funcs->get_clockgating_state(adev, flags);
}
static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index f70da8a29f86..1f714b7af520 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
int vega10_reg_base_init(struct amdgpu_device *adev);
+int vega20_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index def865067edd..0942f492d2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -47,6 +47,21 @@
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
+ do { \
+ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ uint32_t loop = adev->usec_timeout; \
+ while ((tmp_ & (mask)) != (expected_value)) { \
+ udelay(2); \
+ tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ loop--; \
+ if (!loop) { \
+ ret = -ETIMEDOUT; \
+ break; \
+ } \
+ } \
+ } while (0)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f85fdb6..8dc29107228f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -159,6 +159,7 @@
#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
#define EOP_TCL1_ACTION_EN (1 << 16)
#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
+#define EOP_TC_NC_ACTION_EN (1 << 19)
#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
#define DATA_SEL(x) ((x) << 29)
@@ -268,6 +269,11 @@
* x=1: tmz_end
*/
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
* 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 948bb9437757..6fed3d7797a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
static int uvd_v4_2_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
uvd_v4_2_set_irq_funcs(adev);
@@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
@@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
return r;
}
@@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
static int uvd_v4_2_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -208,7 +209,7 @@ done:
static int uvd_v4_2_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v4_2_stop(adev);
@@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle)
*/
static int uvd_v4_2_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz;
int i, j, r;
u32 tmp;
@@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
+static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
/**
* uvd_v4_2_mc_resume - memory controller programming
*
@@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
uint32_t size;
/* programm the VCPU memory controller bits 0-27 */
- addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
@@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
/* bits 28-31 */
- addr = (adev->uvd.gpu_addr >> 28) & 0xF;
+ addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
/* bits 32-39 */
- addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
+ addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
@@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: UVD TRAP\n");
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
return 0;
}
@@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
- if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
}
return 0;
} else {
- if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
if (RREG32_SMC(ixCURRENT_PG_STATUS) &
CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v4_2_ring_get_rptr,
.get_wptr = uvd_v4_2_ring_get_wptr,
@@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.emit_fence = uvd_v4_2_ring_emit_fence,
.test_ring = uvd_v4_2_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v4_2_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs;
}
static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
@@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs;
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v4_2_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 6445d55e7d5a..341ee6d55ce8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -89,6 +89,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
static int uvd_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
uvd_v5_0_set_irq_funcs(adev);
@@ -103,7 +104,7 @@ static int uvd_v5_0_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
@@ -115,9 +116,9 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
return r;
}
@@ -144,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle)
static int uvd_v5_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -204,7 +205,7 @@ done:
static int uvd_v5_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v5_0_stop(adev);
@@ -253,9 +254,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
/* programm memory controller bits 0-27 */
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
+ lower_32_bits(adev->uvd.inst->gpu_addr));
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
+ upper_32_bits(adev->uvd.inst->gpu_addr));
offset = AMDGPU_UVD_FIRMWARE_OFFSET;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -287,7 +288,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
*/
static int uvd_v5_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
@@ -540,6 +541,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
+static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
static bool uvd_v5_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -586,7 +599,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: UVD TRAP\n");
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
return 0;
}
@@ -840,7 +853,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v5_0_ring_get_rptr,
.get_wptr = uvd_v5_0_ring_get_wptr,
@@ -853,7 +865,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.emit_fence = uvd_v5_0_ring_emit_fence,
.test_ring = uvd_v5_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v5_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -861,7 +873,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs;
}
static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
@@ -871,8 +883,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs;
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v5_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index f26f515db2fb..bfddf97dd13e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -62,7 +62,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
{
return ((adev->asic_type >= CHIP_POLARIS10) &&
- (adev->asic_type <= CHIP_POLARIS12) &&
+ (adev->asic_type <= CHIP_VEGAM) &&
(!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
}
@@ -91,7 +91,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
return RREG32(mmUVD_RB_RPTR);
else
return RREG32(mmUVD_RB_RPTR2);
@@ -121,7 +121,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
return RREG32(mmUVD_RB_WPTR);
else
return RREG32(mmUVD_RB_WPTR2);
@@ -152,7 +152,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
+ if (ring == &adev->uvd.inst->ring_enc[0])
WREG32(mmUVD_RB_WPTR,
lower_32_bits(ring->wptr));
else
@@ -375,6 +375,7 @@ error:
static int uvd_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
(RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK))
@@ -399,14 +400,14 @@ static int uvd_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
/* UVD ENC TRAP */
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq);
if (r)
return r;
}
@@ -418,18 +419,18 @@ static int uvd_v6_0_sw_init(void *handle)
if (!uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = NULL;
+ adev->uvd.inst->ring_enc[i].funcs = NULL;
- adev->uvd.irq.num_types = 1;
+ adev->uvd.inst->irq.num_types = 1;
adev->uvd.num_enc_rings = 0;
DRM_INFO("UVD ENC is disabled\n");
} else {
struct drm_sched_rq *rq;
- ring = &adev->uvd.ring_enc[0];
+ ring = &adev->uvd.inst->ring_enc[0];
rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
+ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
+ rq, NULL);
if (r) {
DRM_ERROR("Failed setting up UVD ENC run queue.\n");
return r;
@@ -440,17 +441,17 @@ static int uvd_v6_0_sw_init(void *handle)
if (r)
return r;
- ring = &adev->uvd.ring;
+ ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
+ ring = &adev->uvd.inst->ring_enc[i];
sprintf(ring->name, "uvd_enc%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
if (r)
return r;
}
@@ -469,10 +470,10 @@ static int uvd_v6_0_sw_fini(void *handle)
return r;
if (uvd_v6_0_enc_support(adev)) {
- drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
+ drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+ amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
}
return amdgpu_uvd_sw_fini(adev);
@@ -488,7 +489,7 @@ static int uvd_v6_0_sw_fini(void *handle)
static int uvd_v6_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
@@ -532,7 +533,7 @@ static int uvd_v6_0_hw_init(void *handle)
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
+ ring = &adev->uvd.inst->ring_enc[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
@@ -563,7 +564,7 @@ done:
static int uvd_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
if (RREG32(mmUVD_STATUS) != 0)
uvd_v6_0_stop(adev);
@@ -611,9 +612,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
/* programm memory controller bits 0-27 */
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
+ lower_32_bits(adev->uvd.inst->gpu_addr));
WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
+ upper_32_bits(adev->uvd.inst->gpu_addr));
offset = AMDGPU_UVD_FIRMWARE_OFFSET;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -726,7 +727,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
*/
static int uvd_v6_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
@@ -866,14 +867,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0);
if (uvd_v6_0_enc_support(adev)) {
- ring = &adev->uvd.ring_enc[0];
+ ring = &adev->uvd.inst->ring_enc[0];
WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32(mmUVD_RB_SIZE, ring->ring_size / 4);
- ring = &adev->uvd.ring_enc[1];
+ ring = &adev->uvd.inst->ring_enc[1];
WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr);
@@ -964,6 +965,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
+ * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
* uvd_v6_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1089,6 +1100,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0xE);
}
+static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
uint32_t seq = ring->fence_drv.sync_seq;
@@ -1148,10 +1171,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
if (srbm_soft_reset) {
- adev->uvd.srbm_soft_reset = srbm_soft_reset;
+ adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
return true;
} else {
- adev->uvd.srbm_soft_reset = 0;
+ adev->uvd.inst->srbm_soft_reset = 0;
return false;
}
}
@@ -1160,7 +1183,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
uvd_v6_0_stop(adev);
@@ -1172,9 +1195,9 @@ static int uvd_v6_0_soft_reset(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 srbm_soft_reset;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
- srbm_soft_reset = adev->uvd.srbm_soft_reset;
+ srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
if (srbm_soft_reset) {
u32 tmp;
@@ -1202,7 +1225,7 @@ static int uvd_v6_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst->srbm_soft_reset)
return 0;
mdelay(5);
@@ -1228,17 +1251,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev,
switch (entry->src_id) {
case 124:
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst->ring);
break;
case 119:
if (likely(uvd_v6_0_enc_support(adev)))
- amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
else
int_handled = false;
break;
case 120:
if (likely(uvd_v6_0_enc_support(adev)))
- amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+ amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
else
int_handled = false;
break;
@@ -1521,22 +1544,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
.emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
.emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
@@ -1552,7 +1575,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
@@ -1561,6 +1584,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.emit_fence = uvd_v6_0_ring_emit_fence,
.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+ .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -1600,10 +1624,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
{
if (adev->asic_type >= CHIP_POLARIS10) {
- adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
DRM_INFO("UVD is enabled in VM mode\n");
} else {
- adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs;
+ adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
DRM_INFO("UVD is enabled in physical mode\n");
}
}
@@ -1613,7 +1637,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
+ adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
DRM_INFO("UVD ENC is enabled in VM mode\n");
}
@@ -1626,11 +1650,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev)
{
if (uvd_v6_0_enc_support(adev))
- adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
+ adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
else
- adev->uvd.irq.num_types = 1;
+ adev->uvd.inst->irq.num_types = 1;
- adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs;
+ adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs;
}
const struct amdgpu_ip_block_version uvd_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index eddc57f3b72a..57d32f21b3a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -40,6 +40,8 @@
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
+#define UVD7_MAX_HW_INSTANCES_VEGA20 2
+
static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -47,6 +49,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev);
static void uvd_v7_0_stop(struct amdgpu_device *adev);
static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
+static int amdgpu_ih_clientid_uvds[] = {
+ SOC15_IH_CLIENTID_UVD,
+ SOC15_IH_CLIENTID_UVD1
+};
+
/**
* uvd_v7_0_ring_get_rptr - get read pointer
*
@@ -58,7 +65,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
}
/**
@@ -72,10 +79,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->uvd.ring_enc[0])
- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
else
- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
}
/**
@@ -89,7 +96,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
}
/**
@@ -106,10 +113,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- if (ring == &adev->uvd.ring_enc[0])
- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
else
- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
}
/**
@@ -123,7 +130,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
/**
@@ -144,11 +151,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
return;
}
- if (ring == &adev->uvd.ring_enc[0])
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
+ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
lower_32_bits(ring->wptr));
else
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
+ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
lower_32_bits(ring->wptr));
}
@@ -170,8 +177,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 16);
if (r) {
- DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
- ring->idx, r);
+ DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
+ ring->me, ring->idx, r);
return r;
}
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
@@ -184,11 +191,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
}
if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
+ DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+ ring->me, ring->idx, i);
} else {
- DRM_ERROR("amdgpu: ring %d test failed\n",
- ring->idx);
+ DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
+ ring->me, ring->idx);
r = -ETIMEDOUT;
}
@@ -342,24 +349,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
if (r) {
- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
goto error;
}
r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence);
if (r) {
- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
goto error;
}
r = dma_fence_wait_timeout(fence, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
+ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
r = -ETIMEDOUT;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
} else {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
r = 0;
}
error:
@@ -370,6 +377,10 @@ error:
static int uvd_v7_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_VEGA20)
+ adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
+ else
+ adev->uvd.num_uvd_inst = 1;
if (amdgpu_sriov_vf(adev))
adev->uvd.num_enc_rings = 1;
@@ -386,19 +397,21 @@ static int uvd_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct drm_sched_rq *rq;
- int i, r;
+ int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- /* UVD TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
- if (r)
- return r;
-
- /* UVD ENC TRAP */
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq);
if (r)
return r;
+
+ /* UVD ENC TRAP */
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq);
+ if (r)
+ return r;
+ }
}
r = amdgpu_uvd_sw_init(adev);
@@ -415,43 +428,48 @@ static int uvd_v7_0_sw_init(void *handle)
DRM_INFO("PSP loading UVD firmware\n");
}
- ring = &adev->uvd.ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
- rq, amdgpu_sched_jobs, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD ENC run queue.\n");
- return r;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ ring = &adev->uvd.inst[j].ring_enc[0];
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
+ rq, NULL);
+ if (r) {
+ DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
+ return r;
+ }
}
r = amdgpu_uvd_resume(adev);
if (r)
return r;
- if (!amdgpu_sriov_vf(adev)) {
- ring = &adev->uvd.ring;
- sprintf(ring->name, "uvd");
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
- if (r)
- return r;
- }
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
- sprintf(ring->name, "uvd_enc%d", i);
- if (amdgpu_sriov_vf(adev)) {
- ring->use_doorbell = true;
-
- /* currently only use the first enconding ring for
- * sriov, so set unused location for other unused rings.
- */
- if (i == 0)
- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
- else
- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (!amdgpu_sriov_vf(adev)) {
+ ring = &adev->uvd.inst[j].ring;
+ sprintf(ring->name, "uvd<%d>", j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ sprintf(ring->name, "uvd_enc%d<%d>", i, j);
+ if (amdgpu_sriov_vf(adev)) {
+ ring->use_doorbell = true;
+
+ /* currently only use the first enconding ring for
+ * sriov, so set unused location for other unused rings.
+ */
+ if (i == 0)
+ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+ else
+ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+ }
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+ if (r)
+ return r;
}
- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
- if (r)
- return r;
}
r = amdgpu_virt_alloc_mm_table(adev);
@@ -463,7 +481,7 @@ static int uvd_v7_0_sw_init(void *handle)
static int uvd_v7_0_sw_fini(void *handle)
{
- int i, r;
+ int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_virt_free_mm_table(adev);
@@ -472,11 +490,12 @@ static int uvd_v7_0_sw_fini(void *handle)
if (r)
return r;
- drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
-
- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+ }
return amdgpu_uvd_sw_fini(adev);
}
@@ -490,9 +509,9 @@ static int uvd_v7_0_sw_fini(void *handle)
static int uvd_v7_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring;
uint32_t tmp;
- int i, r;
+ int i, j, r;
if (amdgpu_sriov_vf(adev))
r = uvd_v7_0_sriov_start(adev);
@@ -501,57 +520,60 @@ static int uvd_v7_0_hw_init(void *handle)
if (r)
goto done;
- if (!amdgpu_sriov_vf(adev)) {
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- goto done;
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ ring = &adev->uvd.inst[j].ring;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+ goto done;
+ }
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_TIMEOUT_STATUS), 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+ mmUVD_SEMA_CNTL), 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
}
- r = amdgpu_ring_alloc(ring, 10);
- if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
- goto done;
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ ring = &adev->uvd.inst[j].ring_enc[i];
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
}
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
- amdgpu_ring_write(ring, tmp);
- amdgpu_ring_write(ring, 0xFFFFF);
-
- /* Clear timeout status bits */
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_TIMEOUT_STATUS), 0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
- mmUVD_SEMA_CNTL), 0));
- amdgpu_ring_write(ring, 3);
-
- amdgpu_ring_commit(ring);
}
-
- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- ring = &adev->uvd.ring_enc[i];
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- goto done;
- }
- }
-
done:
if (!r)
DRM_INFO("UVD and UVD ENC initialized successfully.\n");
@@ -569,7 +591,7 @@ done:
static int uvd_v7_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ int i;
if (!amdgpu_sriov_vf(adev))
uvd_v7_0_stop(adev);
@@ -578,7 +600,8 @@ static int uvd_v7_0_hw_fini(void *handle)
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
}
- ring->ready = false;
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
+ adev->uvd.inst[i].ring.ready = false;
return 0;
}
@@ -618,48 +641,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
{
uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
uint32_t offset;
+ int i;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- offset = 0;
- } else {
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr));
- offset = size;
- }
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ }
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr + offset));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr + offset));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
+ }
}
static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -669,6 +695,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
uint64_t addr = table->gpu_addr;
struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
uint32_t size;
+ int i;
size = header->header_size + header->vce_table_size + header->uvd_table_size;
@@ -688,11 +715,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
/* 4, set resp to zero */
WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
- WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0);
- adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0;
- adev->uvd.ring_enc[0].wptr = 0;
- adev->uvd.ring_enc[0].wptr_old = 0;
-
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
+ adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr = 0;
+ adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
+ }
/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
@@ -725,6 +753,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
struct mmsch_v1_0_cmd_end end = { {0} };
uint32_t *init_table = adev->virt.mm_table.cpu_addr;
struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+ uint8_t i = 0;
direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
@@ -742,120 +771,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
init_table += header->uvd_table_offset;
- ring = &adev->uvd.ring;
- ring->wptr = 0;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
-
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
- 0xFFFFFFFF, 0x00000004);
- /* mc resume*/
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
- offset = 0;
- } else {
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr));
- offset = size;
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ ring = &adev->uvd.inst[i].ring;
+ ring->wptr = 0;
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ 0xFFFFFFFF, 0x00000004);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ offset = 0;
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr));
+ offset = size;
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+ /* mc resume end*/
+
+ /* disable clock gating */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+ /* disable interupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ /* stall UMC and register bus before resetting VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+ /* initialize UVD memory controller */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
+ (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L));
+
+ /* take all subblocks out of reset, except VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable VCPU clock */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* enable master interrupt */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+ /* clear the bit 4 of UVD_STATUS */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+ /* force RBC into idle state */
+ size = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ ring = &adev->uvd.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+ /* boot up the VCPU */
+ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
+
+ /* enable UMC */
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
}
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr + offset));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr + offset));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
- lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
- upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
- /* mc resume end*/
-
- /* disable clock gating */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
-
- /* disable interupt */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
-
- /* stall UMC and register bus before resetting VCPU */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- /* put LMI, VCPU, RBC etc... into reset */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
-
- /* initialize UVD memory controller */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
- (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__REQ_MODE_MASK |
- 0x00100000L));
-
- /* take all subblocks out of reset, except VCPU */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-
- /* enable VCPU clock */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* enable master interrupt */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
- /* clear the bit 4 of UVD_STATUS */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
-
- /* force RBC into idle state */
- size = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
-
- ring = &adev->uvd.ring_enc[0];
- ring->wptr = 0;
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
-
- /* boot up the VCPU */
- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
-
- /* enable UMC */
- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
-
- MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
-
/* add end packet */
memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -874,15 +904,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
*/
static int uvd_v7_0_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = &adev->uvd.ring;
+ struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
uint32_t mp_swap_cntl;
- int i, j, r;
+ int i, j, k, r;
- /* disable DPG */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
- ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ /* disable DPG */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ }
/* disable byte swapping */
lmi_swap_cntl = 0;
@@ -890,157 +922,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
uvd_v7_0_mc_resume(adev);
- /* disable clock gating */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0,
- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
-
- /* disable interupt */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
-
- /* put LMI, VCPU, RBC etc... into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
- mdelay(5);
+ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ ring = &adev->uvd.inst[k].ring;
+ /* disable clock gating */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
+ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
- /* initialize UVD memory controller */
- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
- (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__REQ_MODE_MASK |
- 0x00100000L);
+ /* disable interupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* initialize UVD memory controller */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
+ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ 0x00100000L);
#ifdef __BIG_ENDIAN
- /* swap (8 in 32) RB and IB */
- lmi_swap_cntl = 0xa;
- mp_swap_cntl = 0;
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
#endif
- WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
- WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
-
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
-
- /* take all subblocks out of reset, except VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
- /* enable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
- UVD_VCPU_CNTL__CLK_EN_MASK);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
+ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
- /* enable UMC */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* take all subblocks out of reset, except VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
- /* boot up the VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
- mdelay(10);
+ /* enable VCPU clock */
+ WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
+ UVD_VCPU_CNTL__CLK_EN_MASK);
- for (i = 0; i < 10; ++i) {
- uint32_t status;
+ /* enable UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- for (j = 0; j < 100; ++j) {
- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+ /* boot up the VCPU */
+ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
if (status & 2)
break;
+
+ DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
}
- r = 0;
- if (status & 2)
- break;
-
- DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(10);
- r = -1;
- }
-
- if (r) {
- DRM_ERROR("UVD not responding, giving up!!!\n");
- return r;
- }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
- /* clear the bit 4 of UVD_STATUS */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
-
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
-
- /* set the write pointer delay */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
-
- /* set the wb address */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
- (upper_32_bits(ring->gpu_addr) >> 2));
-
- /* programm the RB_BASE for ring buffer */
- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
-
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
-
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
- ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
-
- ring = &adev->uvd.ring_enc[0];
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ if (r) {
+ DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
+ return r;
+ }
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
+ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
- ring = &adev->uvd.ring_enc[1];
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ /* clear the bit 4 of UVD_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ ring = &adev->uvd.inst[k].ring_enc[0];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->uvd.inst[k].ring_enc[1];
+ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
return 0;
}
@@ -1053,26 +1087,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
*/
static void uvd_v7_0_stop(struct amdgpu_device *adev)
{
- /* force RBC into idle state */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
-
- /* Stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
-
- /* put VCPU into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ uint8_t i = 0;
- /* disable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ /* force RBC into idle state */
+ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
- /* Unstall UMC and register bus */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ mdelay(1);
+
+ /* put VCPU into reset */
+ WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(5);
+
+ /* disable VCPU clock */
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
+
+ /* Unstall UMC and register bus */
+ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ }
}
/**
@@ -1091,26 +1129,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, seq);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 2);
}
@@ -1136,6 +1174,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
+ * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
* uvd_v7_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1149,30 +1197,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
- ring->idx, r);
+ DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
+ ring->me, ring->idx, r);
return r;
}
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
+ tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
}
if (i < adev->usec_timeout) {
- DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
+ DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+ ring->me, ring->idx, i);
} else {
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
+ DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
+ ring->me, ring->idx, tmp);
r = -EINVAL;
}
return r;
@@ -1193,17 +1241,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
amdgpu_ring_write(ring, vmid);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1231,13 +1279,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 8);
}
@@ -1247,16 +1295,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
amdgpu_ring_write(ring, mask);
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 12);
}
@@ -1277,12 +1325,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
struct amdgpu_device *adev = ring->adev;
+ int i;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ WARN_ON(ring->wptr % 2 || count % 2);
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
}
static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1349,16 +1400,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
+ (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
AMDGPU_UVD_STATUS_BUSY_MASK))
srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
if (srbm_soft_reset) {
- adev->uvd.srbm_soft_reset = srbm_soft_reset;
+ adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
return true;
} else {
- adev->uvd.srbm_soft_reset = 0;
+ adev->uvd.inst[ring->me].srbm_soft_reset = 0;
return false;
}
}
@@ -1367,7 +1418,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
uvd_v7_0_stop(adev);
@@ -1379,9 +1430,9 @@ static int uvd_v7_0_soft_reset(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 srbm_soft_reset;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
- srbm_soft_reset = adev->uvd.srbm_soft_reset;
+ srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
if (srbm_soft_reset) {
u32 tmp;
@@ -1409,7 +1460,7 @@ static int uvd_v7_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->uvd.srbm_soft_reset)
+ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
return 0;
mdelay(5);
@@ -1431,17 +1482,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_UVD:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_UVD1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
DRM_DEBUG("IH: UVD TRAP\n");
+
switch (entry->src_id) {
case 124:
- amdgpu_fence_process(&adev->uvd.ring);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
break;
case 119:
- amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
break;
case 120:
if (!amdgpu_sriov_vf(adev))
- amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1457,9 +1523,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
{
uint32_t data, data1, data2, suvd_flags;
- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+ data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1503,18 +1569,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
data1 |= suvd_flags;
- WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
}
static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
{
uint32_t data, data1, cgc_flags, suvd_flags;
- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
+ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
+ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
cgc_flags = UVD_CGC_GATE__SYS_MASK |
UVD_CGC_GATE__UDEC_MASK |
@@ -1546,8 +1612,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
data |= cgc_flags;
data1 |= suvd_flags;
- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
+ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
+ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
}
static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1606,7 +1672,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
return 0;
- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+ WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
if (state == AMD_PG_STATE_GATE) {
uvd_v7_0_stop(adev);
@@ -1647,14 +1713,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
.emit_frame_size =
- 6 + 6 + /* hdp flush / invalidate */
+ 6 + /* hdp invalidate */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* uvd_v7_0_ring_emit_vm_flush */
@@ -1663,6 +1728,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.emit_ib = uvd_v7_0_ring_emit_ib,
.emit_fence = uvd_v7_0_ring_emit_fence,
.emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
.test_ring = uvd_v7_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v7_0_ring_insert_nop,
@@ -1671,6 +1737,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.end_use = amdgpu_uvd_ring_end_use,
.emit_wreg = uvd_v7_0_ring_emit_wreg,
.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1702,22 +1769,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.end_use = amdgpu_uvd_ring_end_use,
.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
{
- adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs;
- DRM_INFO("UVD is enabled in VM mode\n");
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
+ adev->uvd.inst[i].ring.me = i;
+ DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+ }
}
static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
- int i;
+ int i, j;
- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
- adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+ adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+ adev->uvd.inst[j].ring_enc[i].me = j;
+ }
- DRM_INFO("UVD ENC is enabled in VM mode\n");
+ DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+ }
}
static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
@@ -1727,8 +1804,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
- adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs;
+ int i;
+
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
+ adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
+ }
}
const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 428d1928e44e..0999c843f623 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -388,7 +388,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
default:
if ((adev->asic_type == CHIP_POLARIS10) ||
(adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12))
+ (adev->asic_type == CHIP_POLARIS12) ||
+ (adev->asic_type == CHIP_VEGAM))
return AMDGPU_VCE_HARVEST_VCE1;
return 0;
@@ -467,8 +468,8 @@ static int vce_v3_0_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
vce_v3_0_override_vce_clock_gating(adev, true);
- if (!(adev->flags & AMD_IS_APU))
- amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+ amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
for (i = 0; i < adev->vce.num_rings; i++)
adev->vce.ring[i].ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 73fd48d6c756..8fd1b742985a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1081,6 +1081,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.end_use = amdgpu_vce_ring_end_use,
.emit_wreg = vce_v4_0_emit_wreg,
.emit_reg_wait = vce_v4_0_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 8c132673bc79..110b294ebed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -35,7 +35,6 @@
#include "mmhub/mmhub_9_1_offset.h"
#include "mmhub/mmhub_9_1_sh_mask.h"
-static int vcn_v1_0_start(struct amdgpu_device *adev);
static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
@@ -146,10 +145,6 @@ static int vcn_v1_0_hw_init(void *handle)
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
int i, r;
- r = vcn_v1_0_start(adev);
- if (r)
- goto done;
-
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r) {
@@ -185,11 +180,9 @@ static int vcn_v1_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
- int r;
- r = vcn_v1_0_stop(adev);
- if (r)
- return r;
+ if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+ vcn_v1_0_stop(adev);
ring->ready = false;
@@ -288,14 +281,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data;
/* JPEG disable CGC */
data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -310,7 +303,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
/* UVD disable CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -415,13 +408,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
/* enable JPEG CGC */
data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -435,7 +428,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
/* enable UVD CGC */
data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
- if (sw)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
else
data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -480,6 +473,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret);
+ }
+
+ /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
+ }
+}
+
/**
* vcn_v1_0_start - start VCN block
*
@@ -499,8 +580,9 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
vcn_v1_0_mc_resume(adev);
+ vcn_1_0_disable_static_power_gating(adev);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev, true);
+ vcn_v1_0_disable_clock_gating(adev);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -680,16 +762,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- /* enable clock gating */
- vcn_v1_0_enable_clock_gating(adev, true);
+ WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
+ vcn_v1_0_enable_clock_gating(adev);
+ vcn_1_0_enable_static_power_gating(adev);
return 0;
}
+bool vcn_v1_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2);
+}
+
+int vcn_v1_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret);
+
+ return ret;
+}
+
static int vcn_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
- /* needed for driver unload*/
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (vcn_v1_0_is_idle(handle))
+ return -EBUSY;
+ vcn_v1_0_enable_clock_gating(adev);
+ } else {
+ /* disable HW gating and enable Sw gating */
+ vcn_v1_0_disable_clock_gating(adev);
+ }
return 0;
}
@@ -1048,16 +1159,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
struct amdgpu_device *adev = ring->adev;
+ int i;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ WARN_ON(ring->wptr % 2 || count % 2);
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+ amdgpu_ring_write(ring, 0);
+ }
}
+static int vcn_v1_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCN block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == AMD_PG_STATE_GATE)
+ return vcn_v1_0_stop(adev);
+ else
+ return vcn_v1_0_start(adev);
+}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
@@ -1069,20 +1200,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.hw_fini = vcn_v1_0_hw_fini,
.suspend = vcn_v1_0_suspend,
.resume = vcn_v1_0_resume,
- .is_idle = NULL /* vcn_v1_0_is_idle */,
- .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */,
+ .is_idle = vcn_v1_0_is_idle,
+ .wait_for_idle = vcn_v1_0_wait_for_idle,
.check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
.pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
.soft_reset = NULL /* vcn_v1_0_soft_reset */,
.post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */,
+ .set_powergating_state = vcn_v1_0_set_powergating_state,
};
static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
@@ -1101,7 +1231,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_dec_ring_test_ring,
.test_ib = amdgpu_vcn_dec_ring_test_ib,
- .insert_nop = vcn_v1_0_ring_insert_nop,
+ .insert_nop = vcn_v1_0_dec_ring_insert_nop,
.insert_start = vcn_v1_0_dec_ring_insert_start,
.insert_end = vcn_v1_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1109,6 +1239,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1139,6 +1270,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
new file mode 100644
index 000000000000..52778de93ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "vega20_ip_offset.h"
+
+int vega20_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke beend by our driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+ adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ }
+ return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 126f1276d347..4ac1288ab7df 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
stoney_mgcg_cgcg_init,
ARRAY_SIZE(stoney_mgcg_cgcg_init));
break;
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
default:
break;
}
@@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
return r;
tmp = RREG32_SMC(cntl_reg);
- tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
- CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+
+ if (adev->flags & AMD_IS_APU)
+ tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
+ else
+ tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+ CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
tmp |= dividers.post_divider;
WREG32_SMC(cntl_reg, tmp);
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
- break;
+ tmp = RREG32_SMC(status_reg);
+ if (adev->flags & AMD_IS_APU) {
+ if (tmp & 0x10000)
+ break;
+ } else {
+ if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+ break;
+ }
mdelay(10);
}
if (i == 100)
return -ETIMEDOUT;
-
return 0;
}
+#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
+#define ixGNB_CLK1_STATUS 0xD822010C
+#define ixGNB_CLK2_DFS_CNTL 0xD8220110
+#define ixGNB_CLK2_STATUS 0xD822012C
+#define ixGNB_CLK3_DFS_CNTL 0xD8220130
+#define ixGNB_CLK3_STATUS 0xD822014C
+
static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
{
int r;
- r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
- if (r)
- return r;
+ if (adev->flags & AMD_IS_APU) {
+ r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
+ if (r)
+ return r;
- r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
- if (r)
- return r;
+ r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
+ if (r)
+ return r;
+ } else {
+ r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+ if (r)
+ return r;
+
+ r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+ if (r)
+ return r;
+ }
return 0;
}
@@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
int r, i;
struct atom_clock_dividers dividers;
u32 tmp;
+ u32 reg_ctrl;
+ u32 reg_status;
+ u32 status_mask;
+ u32 reg_mask;
+
+ if (adev->flags & AMD_IS_APU) {
+ reg_ctrl = ixGNB_CLK3_DFS_CNTL;
+ reg_status = ixGNB_CLK3_STATUS;
+ status_mask = 0x00010000;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ } else {
+ reg_ctrl = ixCG_ECLK_CNTL;
+ reg_status = ixCG_ECLK_STATUS;
+ status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
+ reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+ }
r = amdgpu_atombios_get_clock_dividers(adev,
COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
@@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return r;
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ if (RREG32_SMC(reg_status) & status_mask)
break;
mdelay(10);
}
+
if (i == 100)
return -ETIMEDOUT;
- tmp = RREG32_SMC(ixCG_ECLK_CNTL);
- tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
- CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+ tmp = RREG32_SMC(reg_ctrl);
+ tmp &= ~reg_mask;
tmp |= dividers.post_divider;
- WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+ WREG32_SMC(reg_ctrl, tmp);
for (i = 0; i < 100; i++) {
- if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+ if (RREG32_SMC(reg_status) & status_mask)
break;
mdelay(10);
}
+
if (i == 100)
return -ETIMEDOUT;
@@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev,
}
}
+static bool vi_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ /* CZ has hang issues with full reset at the moment */
+ return false;
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ /* XXX: soft reset should work on fiji and tonga */
+ return true;
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ default:
+ /* change this when we support soft reset */
+ return true;
+ }
+}
+
static const struct amdgpu_asic_funcs vi_asic_funcs =
{
.read_disabled_bios = &vi_read_disabled_bios,
@@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.get_config_memsize = &vi_get_config_memsize,
.flush_hdp = &vi_flush_hdp,
.invalidate_hdp = &vi_invalidate_hdp,
+ .need_full_reset = &vi_need_full_reset,
};
#define CZ_REV_BRISTOL(rev) \
@@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x64;
break;
+ case CHIP_VEGAM:
+ adev->cg_flags = 0;
+ /*AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG;*/
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x6E;
+ break;
case CHIP_CARRIZO:
adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
AMD_CG_SUPPORT_GFX_MGCG |
@@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
vi_common_set_clockgating_state_by_smu(adev, state);
default:
break;
@@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
}
break;
- case CHIP_POLARIS11:
case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d0242240c47..ffd096fffc1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+ kfd_mqd_manager_v9.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
- kfd_kernel_queue_vi.o kfd_packet_manager.o \
- kfd_process_queue_manager.o kfd_device_queue_manager.o \
- kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+ kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
+ kfd_packet_manager.o kfd_process_queue_manager.o \
+ kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
+ kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+ kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 3d5ccb3755d4..49df6c791cfc 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -27,18 +27,28 @@
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
- unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
+ unsigned int vmid, pasid;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = (ihre->ring_id & 0x0000ff00) >> 8;
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+ /* If there is no valid PASID, it's likely a firmware bug */
pasid = (ihre->ring_id & 0xffff0000) >> 16;
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
- /* Do not process in ISR, just request it to be forwarded to WQ. */
- return (pasid != 0) &&
- (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
- ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
+ ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 48769d12dd7b..37ce6dd65391 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -33,7 +33,8 @@
#define APE1_MTYPE(x) ((x) << 7)
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
-#define MTYPE_CACHED 0
+#define MTYPE_CACHED_NV 0
+#define MTYPE_CACHED 1
#define MTYPE_NONCACHED 3
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
new file mode 100644
index 000000000000..f68aef02fc1f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -0,0 +1,560 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+ 0xbf820001, 0xbf820125,
+ 0xb8f4f802, 0x89748674,
+ 0xb8f5f803, 0x8675ff75,
+ 0x00000400, 0xbf850011,
+ 0xc00a1e37, 0x00000000,
+ 0xbf8c007f, 0x87777978,
+ 0xbf840002, 0xb974f802,
+ 0xbe801d78, 0xb8f5f803,
+ 0x8675ff75, 0x000001ff,
+ 0xbf850002, 0x80708470,
+ 0x82718071, 0x8671ff71,
+ 0x0000ffff, 0xb974f802,
+ 0xbe801f70, 0xb8f5f803,
+ 0x8675ff75, 0x00000100,
+ 0xbf840006, 0xbefa0080,
+ 0xb97a0203, 0x8671ff71,
+ 0x0000ffff, 0x80f08870,
+ 0x82f18071, 0xbefa0080,
+ 0xb97a0283, 0xbef60068,
+ 0xbef70069, 0xb8fa1c07,
+ 0x8e7a9c7a, 0x87717a71,
+ 0xb8fa03c7, 0x8e7a9b7a,
+ 0x87717a71, 0xb8faf807,
+ 0x867aff7a, 0x00007fff,
+ 0xb97af807, 0xbef2007e,
+ 0xbef3007f, 0xbefe0180,
+ 0xbf900004, 0x877a8474,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x867aff7f,
+ 0x08000000, 0x8f7a837a,
+ 0x877b7a7b, 0x867aff7f,
+ 0x70000000, 0x8f7a817a,
+ 0x877b7a7b, 0xbeef007c,
+ 0xbeee0080, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cbc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611d3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xb8f5f803,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611d7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dbc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xb8eff801, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbef30080,
+ 0x8773737a, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8f51605, 0x80758175,
+ 0x8e758475, 0x8e7a8275,
+ 0xbefa00ff, 0x01000000,
+ 0xbef60178, 0x80786e78,
+ 0x82798079, 0xbefc0080,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003c, 0x00000000,
+ 0xc06b013c, 0x00000010,
+ 0xc06b023c, 0x00000020,
+ 0xc06b033c, 0x00000030,
+ 0x8078c078, 0x82798079,
+ 0x807c907c, 0xbf0a757c,
+ 0xbf85ffeb, 0xbef80176,
+ 0xbeee0080, 0xbefe00c1,
+ 0xbeff00c1, 0xbefa00ff,
+ 0x01000000, 0xe0724000,
+ 0x6e1e0000, 0xe0724100,
+ 0x6e1e0100, 0xe0724200,
+ 0x6e1e0200, 0xe0724300,
+ 0x6e1e0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f54306,
+ 0x8675c175, 0xbf84002c,
+ 0xbf8a0000, 0x867aff73,
+ 0x04000000, 0xbf840028,
+ 0x8e758675, 0x8e758275,
+ 0xbefa0075, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0x806eff6e, 0x00000080,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe80007b,
+ 0x867bff7b, 0xff7fffff,
+ 0x877bff7b, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8c007f, 0xe0765000,
+ 0x6e1e0002, 0x32040702,
+ 0xd0c9006a, 0x0000eb02,
+ 0xbf87fff7, 0xbefb0000,
+ 0xbeee00ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f52a05, 0x80758175,
+ 0x8e758275, 0x8e7a8875,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0084, 0xbf0a757c,
+ 0xbf840015, 0xbf11017c,
+ 0x8075ff75, 0x00001000,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x6e1e0000,
+ 0xe0724100, 0x6e1e0100,
+ 0xe0724200, 0x6e1e0200,
+ 0xe0724300, 0x6e1e0300,
+ 0x807c847c, 0x806eff6e,
+ 0x00000400, 0xbf0a757c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200ca, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x8676ff7f,
+ 0x08000000, 0x8f768376,
+ 0x877b767b, 0x8676ff7f,
+ 0x70000000, 0x8f768176,
+ 0x877b767b, 0x8676ff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f34306, 0x8673c173,
+ 0xbf840019, 0x8e738673,
+ 0x8e738273, 0xbefa0073,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x8072ff72,
+ 0x00000080, 0xbefa00ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x721e0000,
+ 0xe0510100, 0x721e0000,
+ 0x807cff7c, 0x00000200,
+ 0x8072ff72, 0x00000200,
+ 0xbf0a737c, 0xbf85fff6,
+ 0xbef20080, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f32a05,
+ 0x80738173, 0x8e738273,
+ 0x8e7a8873, 0xbefa00ff,
+ 0x01000000, 0xbef60072,
+ 0x8072ff72, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0x8073ff73, 0x00008000,
+ 0xe0524000, 0x721e0000,
+ 0xe0524100, 0x721e0100,
+ 0xe0524200, 0x721e0200,
+ 0xe0524300, 0x721e0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8072ff72, 0x00000400,
+ 0xbf0a737c, 0xbf85ffee,
+ 0xbf9c0000, 0xe0524000,
+ 0x761e0000, 0xe0524100,
+ 0x761e0100, 0xe0524200,
+ 0x761e0200, 0xe0524300,
+ 0x761e0300, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0x80f2c072, 0xb8f31605,
+ 0x80738173, 0x8e738473,
+ 0x8e7a8273, 0xbefa00ff,
+ 0x01000000, 0xbefc0073,
+ 0xc031003c, 0x00000072,
+ 0x80f2c072, 0xbf8c007f,
+ 0x80fc907c, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff1, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xc0211cfc,
+ 0x00000072, 0x80728472,
+ 0xc0211c3c, 0x00000072,
+ 0x80728472, 0xc0211c7c,
+ 0x00000072, 0x80728472,
+ 0xc0211bbc, 0x00000072,
+ 0x80728472, 0xc0211bfc,
+ 0x00000072, 0x80728472,
+ 0xc0211d3c, 0x00000072,
+ 0x80728472, 0xc0211d7c,
+ 0x00000072, 0x80728472,
+ 0xc0211a3c, 0x00000072,
+ 0x80728472, 0xc0211a7c,
+ 0x00000072, 0x80728472,
+ 0xc0211dfc, 0x00000072,
+ 0x80728472, 0xc0211b3c,
+ 0x00000072, 0x80728472,
+ 0xc0211b7c, 0x00000072,
+ 0x80728472, 0xbf8c007f,
+ 0xbefc0073, 0xbefe006e,
+ 0xbeff006f, 0x867375ff,
+ 0x000003ff, 0xb9734803,
+ 0x867375ff, 0xfffff800,
+ 0x8f738b73, 0xb973a2c3,
+ 0xb977f801, 0x8673ff71,
+ 0xf0000000, 0x8f739c73,
+ 0x8e739073, 0xbef60080,
+ 0x87767376, 0x8673ff71,
+ 0x08000000, 0x8f739b73,
+ 0x8e738f73, 0x87767376,
+ 0x8673ff74, 0x00800000,
+ 0x8f739773, 0xb976f807,
+ 0x8671ff71, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb974f802, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
+};
+
+
+static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbf820001, 0xbf82015a,
+ 0xb8f8f802, 0x89788678,
+ 0xb8f1f803, 0x866eff71,
+ 0x00000400, 0xbf850034,
+ 0x866eff71, 0x00000800,
+ 0xbf850003, 0x866eff71,
+ 0x00000100, 0xbf840008,
+ 0x866eff78, 0x00002000,
+ 0xbf840001, 0xbf810000,
+ 0x8778ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xb8eef807, 0x866fff6e,
+ 0x001f8000, 0x8e6f8b6f,
+ 0x8977ff77, 0xfc000000,
+ 0x87776f77, 0x896eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0xb8f0f812, 0xb8f1f813,
+ 0x8ef08870, 0xc0071bb8,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071c38, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0xb8f1f803, 0x8671ff71,
+ 0x000001ff, 0xbf850002,
+ 0x806c846c, 0x826d806d,
+ 0x866dff6d, 0x0000ffff,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb978f802, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbef00080, 0xb9700283,
+ 0xb8f02407, 0x8e709c70,
+ 0x876d706d, 0xb8f003c7,
+ 0x8e709b70, 0x876d706d,
+ 0xb8f0f807, 0x8670ff70,
+ 0x00007fff, 0xb970f807,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbf900004,
+ 0x87708478, 0xb970f802,
+ 0xbf8e0002, 0xbf88fffe,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8f11605,
+ 0x80718171, 0x8e718671,
+ 0x80707170, 0x80707e70,
+ 0x8271807f, 0x8671ff71,
+ 0x0000ffff, 0xc0471cb8,
+ 0x00000040, 0xbf8cc07f,
+ 0xc04b1d38, 0x00000048,
+ 0xbf8cc07f, 0xc0431e78,
+ 0x00000058, 0xbf8cc07f,
+ 0xc0471eb8, 0x0000005c,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x8670ff7f,
+ 0x08000000, 0x8f708370,
+ 0x87777077, 0x8670ff7f,
+ 0x70000000, 0x8f708170,
+ 0x87777077, 0xbefb007c,
+ 0xbefa0080, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f01605, 0x80708170,
+ 0x8e708670, 0x807a707a,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc007a, 0xc0611efa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8f1f803,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8fbf801,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0x8670ff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f70, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f11605, 0x80718171,
+ 0x8e718471, 0x8e768271,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747a74,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a717c, 0xbf85ffe7,
+ 0xbef40172, 0xbefa0080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0xe0724000, 0x7a1d0000,
+ 0xe0724100, 0x7a1d0100,
+ 0xe0724200, 0x7a1d0200,
+ 0xe0724300, 0x7a1d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f14306, 0x8671c171,
+ 0xbf84002c, 0xbf8a0000,
+ 0x8670ff6f, 0x04000000,
+ 0xbf840028, 0x8e718671,
+ 0x8e718271, 0xbef60071,
+ 0xb8fa2a05, 0x807a817a,
+ 0x8e7a8a7a, 0xb8f01605,
+ 0x80708170, 0x8e708670,
+ 0x807a707a, 0x807aff7a,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x7a1d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000e302, 0xbf87fff7,
+ 0xbef70000, 0xbefa00ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f12a05,
+ 0x80718171, 0x8e718271,
+ 0x8e768871, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a717c, 0xbf840015,
+ 0xbf11017c, 0x8071ff71,
+ 0x00001000, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0xe0724000,
+ 0x7a1d0000, 0xe0724100,
+ 0x7a1d0100, 0xe0724200,
+ 0x7a1d0200, 0xe0724300,
+ 0x7a1d0300, 0x807c847c,
+ 0x807aff7a, 0x00000400,
+ 0xbf0a717c, 0xbf85ffef,
+ 0xbf9c0000, 0xbf8200d9,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x08000000,
+ 0x8f6e836e, 0x87776e77,
+ 0x866eff7f, 0x70000000,
+ 0x8f6e816e, 0x87776e77,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001e, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf840019,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbef80080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x8e76886f,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0x806fff6f,
+ 0x00008000, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe007a,
+ 0xbeff007b, 0x866f71ff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f71ff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc0071cb7, 0x00000040,
+ 0xc00b1d37, 0x00000048,
+ 0xc0031e77, 0x00000058,
+ 0xc0071eb7, 0x0000005c,
+ 0xbf8cc07f, 0x866fff6d,
+ 0xf0000000, 0x8f6f9c6f,
+ 0x8e6f906f, 0xbeee0080,
+ 0x876e6f6e, 0x866fff6d,
+ 0x08000000, 0x8f6f9b6f,
+ 0x8e6f8f6f, 0x876e6f6e,
+ 0x866fff70, 0x00800000,
+ 0x8f6f976f, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb970f802, 0xbf8a0000,
+ 0x95806f6c, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383dcb8b..a2a04bb64096 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -20,9 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if 0
-HW (VI) source code for CWSR trap handler
-#Version 18 + multiple trap handler
+/* To compile this assembly code:
+ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
+ */
+
+/* HW (VI) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
// this performance-optimal version was originally from Seven Xu at SRDC
@@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi
/**************************************************************************/
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
//tba_lo and tba_hi need to be saved/restored
-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
@@ -319,6 +323,10 @@ end
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
end
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
L_SLEEP:
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
@@ -1007,8 +1015,6 @@ end
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
-
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
@@ -1044,6 +1050,7 @@ end
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@@ -1127,258 +1134,3 @@ end
function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes
end
-
-
-#endif
-
-static const uint32_t cwsr_trap_gfx8_hex[] = {
- 0xbf820001, 0xbf820123,
- 0xb8f4f802, 0x89748674,
- 0xb8f5f803, 0x8675ff75,
- 0x00000400, 0xbf850011,
- 0xc00a1e37, 0x00000000,
- 0xbf8c007f, 0x87777978,
- 0xbf840002, 0xb974f802,
- 0xbe801d78, 0xb8f5f803,
- 0x8675ff75, 0x000001ff,
- 0xbf850002, 0x80708470,
- 0x82718071, 0x8671ff71,
- 0x0000ffff, 0xb974f802,
- 0xbe801f70, 0xb8f5f803,
- 0x8675ff75, 0x00000100,
- 0xbf840006, 0xbefa0080,
- 0xb97a0203, 0x8671ff71,
- 0x0000ffff, 0x80f08870,
- 0x82f18071, 0xbefa0080,
- 0xb97a0283, 0xbef60068,
- 0xbef70069, 0xb8fa1c07,
- 0x8e7a9c7a, 0x87717a71,
- 0xb8fa03c7, 0x8e7a9b7a,
- 0x87717a71, 0xb8faf807,
- 0x867aff7a, 0x00007fff,
- 0xb97af807, 0xbef2007e,
- 0xbef3007f, 0xbefe0180,
- 0xbf900004, 0xbf8e0002,
- 0xbf88fffe, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x877b7a7b, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x877b7a7b, 0xbeef007c,
- 0xbeee0080, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cbc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611d3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xb8f5f803,
- 0xbefe007c, 0xbefc006e,
- 0xc0611d7c, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dbc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dfc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xb8eff801, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbef30080,
- 0x8773737a, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8f51605, 0x80758175,
- 0x8e758475, 0x8e7a8275,
- 0xbefa00ff, 0x01000000,
- 0xbef60178, 0x80786e78,
- 0x82798079, 0xbefc0080,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003c, 0x00000000,
- 0xc06b013c, 0x00000010,
- 0xc06b023c, 0x00000020,
- 0xc06b033c, 0x00000030,
- 0x8078c078, 0x82798079,
- 0x807c907c, 0xbf0a757c,
- 0xbf85ffeb, 0xbef80176,
- 0xbeee0080, 0xbefe00c1,
- 0xbeff00c1, 0xbefa00ff,
- 0x01000000, 0xe0724000,
- 0x6e1e0000, 0xe0724100,
- 0x6e1e0100, 0xe0724200,
- 0x6e1e0200, 0xe0724300,
- 0x6e1e0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8f54306,
- 0x8675c175, 0xbf84002c,
- 0xbf8a0000, 0x867aff73,
- 0x04000000, 0xbf840028,
- 0x8e758675, 0x8e758275,
- 0xbefa0075, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0x806eff6e, 0x00000080,
- 0xbefa00ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe80007b,
- 0x867bff7b, 0xff7fffff,
- 0x877bff7b, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8c007f, 0xe0765000,
- 0x6e1e0002, 0x32040702,
- 0xd0c9006a, 0x0000eb02,
- 0xbf87fff7, 0xbefb0000,
- 0xbeee00ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f52a05, 0x80758175,
- 0x8e758275, 0x8e7a8875,
- 0xbefa00ff, 0x01000000,
- 0xbefc0084, 0xbf0a757c,
- 0xbf840015, 0xbf11017c,
- 0x8075ff75, 0x00001000,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0xe0724000, 0x6e1e0000,
- 0xe0724100, 0x6e1e0100,
- 0xe0724200, 0x6e1e0200,
- 0xe0724300, 0x6e1e0300,
- 0x807c847c, 0x806eff6e,
- 0x00000400, 0xbf0a757c,
- 0xbf85ffef, 0xbf9c0000,
- 0xbf8200ca, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x8676ff7f,
- 0x08000000, 0x8f768376,
- 0x877b767b, 0x8676ff7f,
- 0x70000000, 0x8f768176,
- 0x877b767b, 0x8676ff7f,
- 0x04000000, 0xbf84001e,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f34306, 0x8673c173,
- 0xbf840019, 0x8e738673,
- 0x8e738273, 0xbefa0073,
- 0xb8f22a05, 0x80728172,
- 0x8e728a72, 0xb8f61605,
- 0x80768176, 0x8e768676,
- 0x80727672, 0x8072ff72,
- 0x00000080, 0xbefa00ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x721e0000,
- 0xe0510100, 0x721e0000,
- 0x807cff7c, 0x00000200,
- 0x8072ff72, 0x00000200,
- 0xbf0a737c, 0xbf85fff6,
- 0xbef20080, 0xbefe00c1,
- 0xbeff00c1, 0xb8f32a05,
- 0x80738173, 0x8e738273,
- 0x8e7a8873, 0xbefa00ff,
- 0x01000000, 0xbef60072,
- 0x8072ff72, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0x8073ff73, 0x00008000,
- 0xe0524000, 0x721e0000,
- 0xe0524100, 0x721e0100,
- 0xe0524200, 0x721e0200,
- 0xe0524300, 0x721e0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8072ff72, 0x00000400,
- 0xbf0a737c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x761e0000, 0xe0524100,
- 0x761e0100, 0xe0524200,
- 0x761e0200, 0xe0524300,
- 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0x80f2c072, 0xb8f31605,
- 0x80738173, 0x8e738473,
- 0x8e7a8273, 0xbefa00ff,
- 0x01000000, 0xbefc0073,
- 0xc031003c, 0x00000072,
- 0x80f2c072, 0xbf8c007f,
- 0x80fc907c, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff1, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xc0211cfc,
- 0x00000072, 0x80728472,
- 0xc0211c3c, 0x00000072,
- 0x80728472, 0xc0211c7c,
- 0x00000072, 0x80728472,
- 0xc0211bbc, 0x00000072,
- 0x80728472, 0xc0211bfc,
- 0x00000072, 0x80728472,
- 0xc0211d3c, 0x00000072,
- 0x80728472, 0xc0211d7c,
- 0x00000072, 0x80728472,
- 0xc0211a3c, 0x00000072,
- 0x80728472, 0xc0211a7c,
- 0x00000072, 0x80728472,
- 0xc0211dfc, 0x00000072,
- 0x80728472, 0xc0211b3c,
- 0x00000072, 0x80728472,
- 0xc0211b7c, 0x00000072,
- 0x80728472, 0xbf8c007f,
- 0x8671ff71, 0x0000ffff,
- 0xbefc0073, 0xbefe006e,
- 0xbeff006f, 0x867375ff,
- 0x000003ff, 0xb9734803,
- 0x867375ff, 0xfffff800,
- 0x8f738b73, 0xb973a2c3,
- 0xb977f801, 0x8673ff71,
- 0xf0000000, 0x8f739c73,
- 0x8e739073, 0xbef60080,
- 0x87767376, 0x8673ff71,
- 0x08000000, 0x8f739b73,
- 0x8e738f73, 0x87767376,
- 0x8673ff74, 0x00800000,
- 0x8f739773, 0xb976f807,
- 0x86fe7e7e, 0x86ea6a6a,
- 0xb974f802, 0xbf8a0000,
- 0x95807370, 0xbf810000,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
new file mode 100644
index 000000000000..998be96be736
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -0,0 +1,1214 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex
+ */
+
+/* HW (GFX9) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18 --...
+/* Rev History
+** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+** 1. VGPR-SGPR-HWREG-{LDS}
+** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+** 2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+** 2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ 1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+** 2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s = s[34:35] // save start
+var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
+var s_g8sr_ts_save_d = s[40:41] // save end
+var s_g8sr_ts_restore_s = s[42:43] // restore start
+var s_g8sr_ts_restore_d = s[44:45] // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/* control on how to run the shader */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK = 0
+var EMU_RUN_HACK_RESTORE_NORMAL = 0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
+var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS = 1
+var WG_BASE_ADDR_LO = 0x9000a000
+var WG_BASE_ADDR_HI = 0x0
+var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL = 0x0
+var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
+var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
+
+/**************************************************************************/
+/* variables */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK = 0x2000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
+
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+
+/* Save */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT = 27
+var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_tmp = ttmp4
+var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo = ttmp6
+var s_save_xnack_mask_hi = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_status = ttmp12
+var s_save_mem_offset = ttmp14
+var s_save_alloc_size = s_save_trapsts //conflict
+var s_save_m0 = ttmp15
+var s_save_ttmps_lo = s_save_tmp //no conflict
+var s_save_ttmps_hi = s_save_trapsts //no conflict
+
+/* Restore */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
+var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp //no conflict
+
+var s_restore_m0 = s_restore_alloc_size //no conflict
+
+var s_restore_mode = ttmp7
+
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp14
+var s_restore_exec_hi = ttmp15
+var s_restore_status = ttmp4
+var s_restore_trapsts = ttmp5
+var s_restore_xnack_mask_lo = xnack_mask_lo
+var s_restore_xnack_mask_hi = xnack_mask_hi
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_ttmps_lo = s_restore_tmp //no conflict
+var s_restore_ttmps_hi = s_restore_alloc_size //no conflict
+
+/**************************************************************************/
+/* trap handler entry points */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+ asic(GFX9)
+ type(CS)
+
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
+ //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
+ s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+ s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
+ //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
+ s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
+ else
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+ end
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE //restore
+
+L_SKIP_RESTORE:
+
+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_cbranch_scc1 L_SAVE //this is the operation for save
+
+ // ********* Handle non-CWSR traps *******************
+if (!EMU_RUN_HACK)
+ // Illegal instruction is a non-maskable exception which blocks context save.
+ // Halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_cbranch_scc1 L_HALT_WAVE
+
+ // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
+ // Instead, halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+ s_cbranch_scc0 L_FETCH_2ND_TRAP
+
+L_HALT_WAVE:
+ // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
+ // We cannot prevent further faults so just terminate the wavefront.
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_ALREADY_HALTED
+ s_endpgm
+L_NOT_ALREADY_HALTED:
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring. The debugger compensates for this.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+L_FETCH_2ND_TRAP:
+ // Preserve and clear scalar XNACK state before issuing scalar reads.
+ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+ s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp3
+
+ s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+ s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+ s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+ s_waitcnt lgkmcnt(0)
+ s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+ s_waitcnt lgkmcnt(0)
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+ s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
+ s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
+ s_addc_u32 ttmp1, ttmp1, 0
+L_EXCP_CASE:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_IB_STS.
+ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+
+ s_rfe_b64 [ttmp0, ttmp1]
+end
+ // ********* End handling of non-CWSR traps *******************
+
+/**************************************************************************/
+/* save routine */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+end
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+
+ s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_sq_save_msg
+ s_waitcnt lgkmcnt(0)
+end
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+ end
+
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+ L_SLEEP:
+ s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_cbranch_execz L_SLEEP
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_spi_wrexec
+ s_waitcnt lgkmcnt(0)
+end
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_save_tmp, v9, 0
+ s_lshr_b32 s_save_tmp, s_save_tmp, 6
+ s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+
+ // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_save_ttmps_lo)
+ get_sgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
+ s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
+ s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+ ack_sqc_store_workaround()
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+ ack_sqc_store_workaround()
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
+
+ //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
+ s_mov_b32 s_save_m0, m0 //save M0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0
+
+
+
+
+ /* save HW registers */
+ //////////////////////////////
+
+ L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
+
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ end
+
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS
+
+ //s_save_trapsts conflicts with s_save_alloc_size
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS
+
+ write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO
+ write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI
+
+ //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
+ s_mov_b32 s_save_exec_hi, 0x0
+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+ L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete?
+ // restore s_save_buf_rsrc0,1
+ //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+ s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+ /* save first 4 VGPR, then LDS save could use */
+ // each wave will alloc 4 vgprs at least...
+ /////////////////////////////////////////////////////////////////////////////////////
+
+ s_mov_b32 s_save_mem_offset, 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_mov_b32 xnack_mask_lo, 0x0
+ s_mov_b32 xnack_mask_hi, 0x0
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+end
+
+
+
+ /* save LDS */
+ //////////////////////////////
+
+ L_SAVE_LDS:
+
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_barrier //LDS is used? wait for other waves in the same TG
+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+ s_mov_b32 s3, 256*2
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ L_SAVE_LDS_LOOP:
+ //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+ if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+ end
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
+ // store from higest LDS address to lowest
+ s_mov_b32 s3, 256*2
+ s_sub_u32 m0, s_save_alloc_size, s3
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+ s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
+ s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
+ s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
+ s_nop 0
+ s_nop 0
+ s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
+ s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
+ s_add_u32 s0, s0,s_save_alloc_size
+ s_addc_u32 s1, s1, 0
+ s_setpc_b64 s[0:1]
+
+
+ for var i =0; i< 128; i++
+ // be careful to make here a 64Byte aligned address, which could improve performance...
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+
+ if i!=127
+ s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
+ s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
+ end
+ end
+
+else // BUFFER_STORE
+ v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+ v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
+ v_mul_i32_i24 v2, v3, 8 // tid*8
+ v_mov_b32 v3, 256*2
+ s_mov_b32 m0, 0x10000
+ s_mov_b32 s0, s_save_buf_rsrc3
+ s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+ ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
+ s_waitcnt lgkmcnt(0)
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+// s_waitcnt vmcnt(0)
+// v_add_u32 v2, vcc[0:1], v2, v3
+ v_add_u32 v2, v2, v3
+ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+ s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+ // restore rsrc3
+ s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+ /* save VGPRs - set the Rest VGPRs */
+ //////////////////////////////////////////////////////////////////////////////////////
+ L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, 4 // skip first 4 VGPRs
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
+
+ s_set_gpr_idx_on m0, 0x1 // This will change M0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
+L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 // v0 = v[0+m0]
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 m0, m0, 4
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =0
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+
+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
+
+ L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 //v0 = v[0+m0]
+ v_mov_b32 v1, v1 //v0 = v[0+m0]
+ v_mov_b32 v2, v2 //v0 = v[0+m0]
+ v_mov_b32 v3, v3 //v0 = v[0+m0]
+
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ end
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+ /* S_PGM_END_SAVED */ //FIXME graphics ONLY
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ s_rfe_b64 s_save_pc_lo //Return to the main shader program
+ else
+ end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_d
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // Need reset rsrc2??
+ s_mov_b32 m0, s_save_mem_offset
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
+end
+
+
+ s_branch L_END_PGM
+
+
+
+/**************************************************************************/
+/* restore routine */
+/**************************************************************************/
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_restore_tmp, v9, 0
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+ s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+ s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+ else
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+ s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+ s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
+ /* global mem offset */
+// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+ //////////////////////////////
+ L_RESTORE_LDS:
+
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+ L_RESTORE_LDS_LOOP:
+ if (SAVE_LDS)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
+ end
+ s_add_u32 m0, m0, 256*2 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
+
+
+ /* restore VGPRs */
+ //////////////////////////////
+ L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+if G8SR_VGPR_SR_IN_DWX4
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, s_restore_alloc_size
+ s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+ buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ s_waitcnt vmcnt(0)
+ s_sub_u32 m0, m0, 4
+ v_mov_b32 v0, v0 // v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_cmp_eq_u32 m0, 0x8000
+ s_cbranch_scc0 L_RESTORE_VGPR_LOOP
+ s_set_gpr_idx_off
+
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
+
+else
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 1
+ s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
+
+ L_RESTORE_VGPR_LOOP:
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+ end
+ s_waitcnt vmcnt(0) //ensure data ready
+ v_mov_b32 v0, v0 //v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
+ s_set_gpr_idx_off
+ /* VGPR restore on v0 */
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+ end
+
+end
+
+ /* restore SGPRs */
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made
+ s_waitcnt lgkmcnt(0) //ensure data ready
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete?
+
+ /* restore HW registers */
+ //////////////////////////////
+ L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+ s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+ s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS
+ read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS
+ read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
+ read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
+
+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+
+ //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+ s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
+
+ // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo)
+ get_sgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
+ s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+ s_waitcnt lgkmcnt(0)
+
+ //reuse s_restore_m0 as a temp register
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+ s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+
+ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_d
+ s_waitcnt lgkmcnt(0)
+end
+
+// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+ s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/* the END */
+/**************************************************************************/
+L_END_PGM:
+ s_endpgm
+
+end
+
+
+/**************************************************************************/
+/* the helper functions */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+ s_buffer_store_dword s, s_rsrc, m0 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+ // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+ s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size
+ s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+ s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1
+ s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value)
+end
+
+function get_hwreg_size_bytes
+ return 128 //HWREG size 128 bytes
+end
+
+function ack_sqc_store_workaround
+ if ACK_SQC_STORE
+ s_waitcnt lgkmcnt(0)
+ end
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 59808a39ecf4..f64c5551cdba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Size: 0x%llX, %u\n",
q_properties->queue_size, args->ring_size);
- pr_debug("Queue r/w Pointers: %p, %p\n",
+ pr_debug("Queue r/w Pointers: %px, %px\n",
q_properties->read_ptr,
q_properties->write_ptr);
@@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
- args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
+ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
+ args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
+ if (KFD_IS_SOC15(dev->device_info->asic_family))
+ /* On SOC15 ASICs, doorbell allocation must be
+ * per-device, and independent from the per-process
+ * queue_id. Return the doorbell offset within the
+ * doorbell aperture to user mode.
+ */
+ args->doorbell_offset |= q_properties.doorbell_off;
mutex_unlock(&p->mutex);
@@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1645,23 +1653,33 @@ err_i1:
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
+ struct kfd_dev *dev = NULL;
+ unsigned long vm_pgoff;
+ unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
- if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
- KFD_MMAP_DOORBELL_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
- return kfd_doorbell_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
- KFD_MMAP_EVENTS_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
+ gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+ if (gpu_id)
+ dev = kfd_device_by_id(gpu_id);
+
+ switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+ case KFD_MMAP_TYPE_DOORBELL:
+ if (!dev)
+ return -ENODEV;
+ return kfd_doorbell_mmap(dev, process, vma);
+
+ case KFD_MMAP_TYPE_EVENTS:
return kfd_event_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
- KFD_MMAP_RESERVED_MEM_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
- return kfd_reserved_mem_mmap(process, vma);
+
+ case KFD_MMAP_TYPE_RESERVED_MEM:
+ if (!dev)
+ return -ENODEV;
+ return kfd_reserved_mem_mmap(dev, process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 4f126ef6139b..296b3f230280 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define fiji_cache_info carrizo_cache_info
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
+/* TODO - check & update Vega10 cache details */
+#define vega10_cache_info carrizo_cache_info
+#define raven_cache_info carrizo_cache_info
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris11_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
+ case CHIP_VEGA10:
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case CHIP_RAVEN:
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3346699960dd..7ee6cec2c060 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,16 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
-#include <linux/amd-iommu.h>
-#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
-#include "cwsr_trap_handler_gfx8.asm"
+#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
@@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = {
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_pci_atomics = true,
};
+static const struct kfd_device_info vega10_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info vega10_vf_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
struct kfd_deviceid {
unsigned short did;
@@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
+ { 0x6860, &vega10_device_info }, /* Vega10 */
+ { 0x6861, &vega10_device_info }, /* Vega10 */
+ { 0x6862, &vega10_device_info }, /* Vega10 */
+ { 0x6863, &vega10_device_info }, /* Vega10 */
+ { 0x6864, &vega10_device_info }, /* Vega10 */
+ { 0x6867, &vega10_device_info }, /* Vega10 */
+ { 0x6868, &vega10_device_info }, /* Vega10 */
+ { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
+ { 0x687F, &vega10_device_info }, /* Vega10 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
-
+ int ret;
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
@@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
- if (device_info->needs_pci_atomics) {
- /* Allow BIF to recode atomics to PCIe 3.0
- * AtomicOps. 32 and 64-bit requests are possible and
- * must be supported.
- */
- if (pci_enable_atomic_ops_to_root(pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
- dev_info(kfd_device,
- "skipped device %x:%x, PCI rejects atomics",
- pdev->vendor, pdev->device);
- return NULL;
- }
+ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+ * 32 and 64-bit requests are possible and must be
+ * supported.
+ */
+ ret = pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (device_info->needs_pci_atomics && ret < 0) {
+ dev_info(kfd_device,
+ "skipped device %x:%x, PCI rejects atomics\n",
+ pdev->vendor, pdev->device);
+ return NULL;
}
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
@@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ }
- kfd->cwsr_isa = cwsr_trap_gfx8_hex;
- kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
kfd->cwsr_enabled = true;
}
}
@@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
+int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_evict_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_restore_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
* prepare for safe eviction of KFD BOs that belong to the specified
* process.
@@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
return -ENOMEM;
- *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if ((*mem_obj) == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d55d29d31da4..668ad07ebe1f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
+static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+{
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ /* On pre-SOC15 chips we need to use the queue ID to
+ * preserve the user mode ABI.
+ */
+ q->doorbell_id = q->properties.queue_id;
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ /* For SDMA queues on SOC15, use static doorbell
+ * assignments based on the engine and queue.
+ */
+ q->doorbell_id = dev->shared_resources.sdma_doorbell
+ [q->properties.sdma_engine_id]
+ [q->properties.sdma_queue_id];
+ } else {
+ /* For CP queues on SOC15 reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
+ }
+
+ q->properties.doorbell_off =
+ kfd_doorbell_id_to_offset(dev, q->process,
+ q->doorbell_id);
+
+ return 0;
+}
+
+static void deallocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ unsigned int old;
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ return;
+
+ old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+ WARN_ON(!old);
+}
+
static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm,
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
struct qcm_process_device *qpd)
{
- uint32_t len;
+ const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
+ int ret;
if (!qpd->ib_kaddr)
return -ENOMEM;
- len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ if (ret)
+ return ret;
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
- qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
+ pmf->release_mem_size / sizeof(uint32_t));
}
static void deallocate_vmid(struct device_queue_manager *dqm,
@@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
if (retval)
return retval;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_hqd;
+
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_hqd;
+ goto out_deallocate_doorbell;
pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
q->pipe, q->queue);
@@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_hqd:
deallocate_hqd(dqm, q);
@@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
}
dqm->total_queue_count--;
+ deallocate_doorbell(qpd, q);
+
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
@@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
pr_debug("SDMA id is: %d\n", q->sdma_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
if (retval)
@@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_engine_id =
q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
}
+
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
}
/*
* Eviction state logic: we only mark active queues as evicted
@@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
@@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_unlock(&dqm->lock);
return retval;
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
goto failed;
}
+ deallocate_doorbell(qpd, q);
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- bool retval;
+ bool retval = true;
+
+ if (!dqm->asic_ops.set_cache_memory_policy)
+ return retval;
mutex_lock(&dqm->lock);
@@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS11:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
@@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0;
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+ KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
+ if (!r) {
+ seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
+ KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
+ KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
+ KFD_CIK_HIQ_QUEUE);
+ seq_reg_dump(m, dump, n_regs);
+
+ kfree(dump);
+ }
+
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 412beff3281d..59a6b1956932 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -200,6 +200,8 @@ void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
new file mode 100644
index 000000000000..79e5bcf6367c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "vega10_enum.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->update_qpd = update_qpd_v9;
+ asic_ops->init_sdma_vm = init_sdma_vm_v9;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd;
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+ if (vega10_noretry &&
+ !dqm->dev->device_info->needs_iommu_device)
+ qpd->sh_mem_config |=
+ 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+ return 0;
+}
+
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebb4da14e3df..c3744d89352c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -33,7 +33,6 @@
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
-#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
*/
/* # of doorbell bytes allocated for each process. */
-static inline size_t doorbell_process_allocation(void)
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+ return roundup(kfd->device_info->doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
- doorbell_process_allocation();
+ kfd_doorbell_process_slice(kfd);
else
return -ENOSPC;
@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr)
return -ENOMEM;
@@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
iounmap(kfd->doorbell_kernel_ptr);
}
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma)
{
phys_addr_t address;
- struct kfd_dev *dev;
/*
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
- if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
- return -EINVAL;
-
- /* Find kfd device according to gpu id */
- dev = kfd_device_by_id(vma->vm_pgoff);
- if (!dev)
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
@@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(dev));
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
- doorbell_process_allocation(),
+ kfd_doorbell_process_slice(dev),
vma->vm_page_prot);
}
/* get kernel iomem pointer for a doorbell */
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off)
{
u32 inx;
@@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
+ inx *= kfd->device_info->doorbell_size / sizeof(u32);
+
/*
* Calculating the kernel doorbell offset using the first
* doorbell page.
@@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
mutex_unlock(&kfd->doorbell_mutex);
}
-inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+void write_kernel_doorbell(void __iomem *db, u32 value)
{
if (db) {
writel(value, db);
@@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
}
}
-/*
- * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
- * to doorbells with the process's doorbell page
- */
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+ if (db) {
+ WARN(((unsigned long)db & 7) != 0,
+ "Unaligned 64-bit doorbell");
+ writeq(value, (u64 __iomem *)db);
+ pr_debug("writing %llu to doorbell address %p\n", value, db);
+ }
+}
+
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id)
+ unsigned int doorbell_id)
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
- * index * doorbell_process_allocation/sizeof(u32) adjusts to
- * the process's doorbells.
+ * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+ * the process's doorbells. The offset returned is in dword
+ * units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_id_offset +
process->doorbell_index
- * doorbell_process_allocation() / sizeof(u32) +
- queue_id;
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) /
- doorbell_process_allocation() + 1;
+ kfd_doorbell_process_slice(kfd) + 1;
return num_of_elems;
@@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
{
return dev->doorbell_base +
- process->doorbell_index * doorbell_process_allocation();
+ process->doorbell_index * kfd_doorbell_process_slice(dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4890a90f1e44..5562e94e786a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
- *event_page_offset = KFD_MMAP_EVENTS_MASK;
+ *event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
@@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
- if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 66852de410c8..97d5423c5673 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -275,23 +275,35 @@
* for FLAT_* / S_LOAD operations.
*/
-#define MAKE_GPUVM_APP_BASE(gpu_num) \
+#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
#define MAKE_GPUVM_APP_LIMIT(base, size) \
(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
-#define MAKE_SCRATCH_APP_BASE() \
+#define MAKE_SCRATCH_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_LIMIT(base) \
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
-#define MAKE_LDS_APP_BASE() \
+#define MAKE_LDS_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x0)
#define MAKE_LDS_APP_LIMIT(base) \
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+/* On GFXv9 the LDS and scratch apertures are programmed independently
+ * using the high 16 bits of the 64-bit virtual address. They must be
+ * in the hole, which will be the case as long as the high 16 bits are
+ * not 0.
+ *
+ * The aperture sizes are still 4GB implicitly.
+ *
+ * A GPUVM aperture is not applicable on GFXv9.
+ */
+#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
+#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
+
/* User mode manages most of the SVM aperture address space. The low
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
* for now).
@@ -300,6 +312,55 @@
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
+static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
+{
+ /*
+ * node id couldn't be 0 - the three MSB bits of
+ * aperture shoudn't be 0
+ */
+ pdd->lds_base = MAKE_LDS_APP_BASE_VI();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ if (!pdd->dev->device_info->needs_iommu_device) {
+ /* dGPUs: SVM aperture starting at 0
+ * with small reserved space for kernel.
+ * Set them to CANONICAL addresses.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+ } else {
+ /* set them to non CANONICAL addresses, and no SVM is
+ * allocated.
+ */
+ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
+ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
+ pdd->dev->shared_resources.gpuvm_size);
+ }
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
+{
+ pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ /* Raven needs SVM to support graphic handle, etc. Leave the small
+ * reserved space before SVM on Raven as well, even though we don't
+ * have to.
+ * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+ * are used in Thunk to reserve SVM.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
int kfd_init_apertures(struct kfd_process *process)
{
uint8_t id = 0;
@@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process)
struct kfd_process_device *pdd;
/*Iterating over all devices*/
- while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
- id < NUM_OF_SUPPORTED_GPUS) {
-
+ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
if (!dev) {
id++; /* Skip non GPU devices */
continue;
@@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
pr_err("Failed to create process device data\n");
- return -1;
+ return -ENOMEM;
}
/*
* For 64 bit process apertures will be statically reserved in
@@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- /* Same LDS and scratch apertures can be used
- * on all GPUs. This allows using more dGPUs
- * than placement options for apertures.
- */
- pdd->lds_base = MAKE_LDS_APP_BASE();
- pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
-
- pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
- pdd->scratch_limit =
- MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+ switch (dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ kfd_init_apertures_vi(pdd, id);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd_init_apertures_v9(pdd, id);
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dev->device_info->asic_family);
+ return -EINVAL;
+ }
- if (dev->device_info->needs_iommu_device) {
- /* APUs: GPUVM aperture in
- * non-canonical address space
- */
- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
- pdd->gpuvm_base,
- dev->shared_resources.gpuvm_size);
- } else {
- /* dGPUs: SVM aperture starting at 0
- * with small reserved space for kernel
+ if (!dev->device_info->needs_iommu_device) {
+ /* dGPUs: the reserved space for kernel
+ * before SVM
*/
- pdd->gpuvm_base = SVM_USER_BASE;
- pdd->gpuvm_limit =
- dev->shared_resources.gpuvm_size - 1;
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
pdd->qpd.ib_base = SVM_IB_BASE;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
new file mode 100644
index 000000000000..37029baa3346
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+
+
+static bool event_interrupt_isr_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+
+ /* If there is no valid PASID, it's likely a firmware bug */
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+ pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, pasid);
+ pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3],
+ data[4], data[5], data[6], data[7]);
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE;
+}
+
+static void event_interrupt_wq_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ uint32_t context_id;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id, 32);
+ else if (source_id == SOC15_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
+ kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ kfd_signal_hw_exception_event(pasid);
+ else if (client_id == SOC15_IH_CLIENTID_VMC ||
+ client_id == SOC15_IH_CLIENTID_UTCL2) {
+ /* TODO */
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
+ .interrupt_isr = event_interrupt_isr_v9,
+ .interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 035c351f47c5..db6d9336b80d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
+ uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- uint32_t ih_ring_entry[DIV_ROUND_UP(
- dev->device_info->ih_ring_entry_size,
- sizeof(uint32_t))];
+ if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ dev_err_once(kfd_chardev(), "Ring entry too small\n");
+ return;
+ }
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 69f496485331..476951d8c91c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
size_t available_size;
size_t queue_size_dwords;
uint32_t wptr, rptr;
+ uint64_t wptr64;
unsigned int *queue_address;
/* When rptr == wptr, the buffer is empty.
@@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
*/
rptr = *kq->rptr_kernel;
- wptr = *kq->wptr_kernel;
+ wptr = kq->pending_wptr;
+ wptr64 = kq->pending_wptr64;
queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / 4;
@@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* make sure calling functions know
* acquire_packet_buffer() failed
*/
- *buffer_ptr = NULL;
- return -ENOMEM;
+ goto err_no_space;
}
if (wptr + packet_size_in_dwords >= queue_size_dwords) {
/* make sure after rolling back to position 0, there is
* still enough space.
*/
- if (packet_size_in_dwords >= rptr) {
- *buffer_ptr = NULL;
- return -ENOMEM;
- }
+ if (packet_size_in_dwords >= rptr)
+ goto err_no_space;
+
/* fill nops, roll back and start at position 0 */
while (wptr > 0) {
queue_address[wptr] = kq->nop_packet;
wptr = (wptr + 1) % queue_size_dwords;
+ wptr64++;
}
}
*buffer_ptr = &queue_address[wptr];
kq->pending_wptr = wptr + packet_size_in_dwords;
+ kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
return 0;
+
+err_no_space:
+ *buffer_ptr = NULL;
+ return -ENOMEM;
}
static void submit_packet(struct kernel_queue *kq)
@@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
pr_debug("\n");
#endif
- *kq->wptr_kernel = kq->pending_wptr;
- write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr);
+ kq->ops_asic_specific.submit_packet(kq);
}
static void rollback_packet(struct kernel_queue *kq)
{
- kq->pending_wptr = *kq->queue->properties.write_ptr;
+ if (kq->dev->device_info->doorbell_size == 8) {
+ kq->pending_wptr64 = *kq->wptr64_kernel;
+ kq->pending_wptr = *kq->wptr_kernel %
+ (kq->queue->properties.queue_size / 4);
+ } else {
+ kq->pending_wptr = *kq->wptr_kernel;
+ }
}
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kernel_queue_init_v9(&kq->ops_asic_specific);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 594053136ee4..97aff2041a5d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -72,6 +72,7 @@ struct kernel_queue {
struct kfd_dev *dev;
struct mqd_manager *mqd;
struct queue *queue;
+ uint64_t pending_wptr64;
uint32_t pending_wptr;
unsigned int nop_packet;
@@ -79,7 +80,10 @@ struct kernel_queue {
uint32_t *rptr_kernel;
uint64_t rptr_gpu_addr;
struct kfd_mem_obj *wptr_mem;
- uint32_t *wptr_kernel;
+ union {
+ uint64_t *wptr64_kernel;
+ uint32_t *wptr_kernel;
+ };
uint64_t wptr_gpu_addr;
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
@@ -97,5 +101,6 @@ struct kernel_queue {
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+void kernel_queue_init_v9(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
index a90eb440b1fb..19e54acb4125 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -26,11 +26,13 @@
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_cik(struct kernel_queue *kq);
+static void submit_packet_cik(struct kernel_queue *kq);
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_cik;
ops->uninitialize = uninitialize_cik;
+ ops->submit_packet = submit_packet_cik;
}
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
static void uninitialize_cik(struct kernel_queue *kq)
{
}
+
+static void submit_packet_cik(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
new file mode 100644
index 000000000000..684a3bf07efd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_opcodes.h"
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_v9(struct kernel_queue *kq);
+static void submit_packet_v9(struct kernel_queue *kq);
+
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+ ops->initialize = initialize_v9;
+ ops->uninitialize = uninitialize_v9;
+ ops->submit_packet = submit_packet_v9;
+}
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ int retval;
+
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval)
+ return false;
+
+ kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+ kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+ memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+ return true;
+}
+
+static void uninitialize_v9(struct kernel_queue *kq)
+{
+ kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
+
+static void submit_packet_v9(struct kernel_queue *kq)
+{
+ *kq->wptr64_kernel = kq->pending_wptr64;
+ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr64);
+}
+
+static int pm_map_process_v9(struct packet_manager *pm,
+ uint32_t *buffer, struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+ uint64_t vm_page_table_base_addr =
+ (uint64_t)(qpd->page_table_base) << 12;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields14.gds_size = qpd->gds_size;
+ packet->bitfields14.num_gws = qpd->num_gws;
+ packet->bitfields14.num_oac = qpd->num_oac;
+ packet->bitfields14.sdma_enable = 1;
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+ packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ packet->vm_context_page_table_base_addr_lo32 =
+ lower_32_bits(vm_page_table_base_addr);
+ packet->vm_context_page_table_base_addr_hi32 =
+ upper_32_bits(vm_page_table_base_addr);
+
+ return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+
+static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(struct pm4_mec_release_mem));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
+
+ packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+ .map_process = pm_map_process_v9,
+ .runlist = pm_runlist_v9,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_v9,
+ .unmap_queues = pm_unmap_queues_v9,
+ .query_status = pm_query_status_v9,
+ .release_mem = pm_release_mem_v9,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index f1d48281e322..bf20c6d32ef3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -22,15 +22,20 @@
*/
#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_vi(struct kernel_queue *kq);
+static void submit_packet_vi(struct kernel_queue *kq);
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_vi;
ops->uninitialize = uninitialize_vi;
+ ops->submit_packet = submit_packet_vi;
}
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
+
+static void submit_packet_vi(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+ union PM4_MES_TYPE_3_HEADER header;
+
+ header.u32All = 0;
+ header.opcode = opcode;
+ header.count = packet_size / 4 - 2;
+ header.type = PM4_TYPE_3;
+
+ return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields3.page_table_base = qpd->page_table_base;
+ packet->bitfields10.gds_size = qpd->gds_size;
+ packet->bitfields10.num_gws = qpd->num_gws;
+ packet->bitfields10.num_oac = qpd->num_oac;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+ packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ if (WARN_ON(!ib))
+ return -EFAULT;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
+{
+ struct pm4_mes_set_resources *packet;
+
+ packet = (struct pm4_mes_set_resources *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+ packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+ sizeof(struct pm4_mes_set_resources));
+
+ packet->bitfields2.queue_type =
+ queue_type__mes_set_resources__hsa_interface_queue_hiq;
+ packet->bitfields2.vmid_mask = res->vmid_mask;
+ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ packet->bitfields7.oac_mask = res->oac_mask;
+ packet->bitfields8.gds_heap_base = res->gds_heap_base;
+ packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+ packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+ packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+ packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+ packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+ return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(*packet));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(*packet));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ packet->bitfields2.atc = 0;
+
+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel___release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+ .map_process = pm_map_process_vi,
+ .runlist = pm_runlist_vi,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_vi,
+ .unmap_queues = pm_unmap_queues_vi,
+ .query_status = pm_query_status_vi,
+ .release_mem = pm_release_mem_vi,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index e0c07d24d251..76bf2dc8aec4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
+ .quiesce_mm = kgd2kfd_quiesce_mm,
+ .resume_mm = kgd2kfd_resume_mm,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
@@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+int vega10_noretry;
+module_param_named(noretry, vega10_noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index ee7061e1c466..4b8eb506642b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ return mqd_manager_init_v9(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index c00c325ed3c9..06eaa218eba6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
- m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
- /* Although WinKFD writes this, I suspect it should not be necessary */
- m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
QUANTUM_DURATION(10);
@@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
new file mode 100644
index 000000000000..684054ff02cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v9_structs.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct v9_mqd *m;
+ struct kfd_dev *kfd = mm->dev;
+
+ /* From V9, for CWSR, the control stack is located on the next page
+ * boundary after the mqd, we will use the gtt allocation function
+ * instead of sub-allocation function.
+ */
+ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+ *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+ if (!*mqd_mem_obj)
+ return -ENOMEM;
+ retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
+ ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
+ &((*mqd_mem_obj)->gtt_mem),
+ &((*mqd_mem_obj)->gpu_addr),
+ (void *)&((*mqd_mem_obj)->cpu_ptr));
+ } else
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
+ mqd_mem_obj);
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, sizeof(struct v9_mqd));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (q->tba_addr) {
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control =
+ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+ 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ order_base_2(q->eop_ring_buffer_size / 4) - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control = 0;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy
+ (mm->dev->kgd, mqd, type, timeout,
+ pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ struct kfd_dev *kfd = mm->dev;
+
+ if (mqd_mem_obj->gtt_mem) {
+ kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = update_mqd(mm, mqd, q);
+
+ if (retval != 0)
+ return retval;
+
+ /* TODO: what's the point? update_mqd already does this. */
+ m = get_mqd(mqd);
+ m->cp_hqd_vmid = q->vmid;
+ return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ struct v9_sdma_mqd *m;
+
+
+ retval = kfd_gtt_sa_allocate(mm->dev,
+ sizeof(struct v9_sdma_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v9_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+ << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+/*
+ * * preempt type here is ignored because there is only one way
+ * * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->load_mqd = load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = destroy_mqd_sdma;
+ mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242e43e7..481307b8b4db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 89ba4c670ec5..c317feb43f69 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,8 +26,6 @@
#include "kfd_device_queue_manager.h"
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
-#include "kfd_pm4_headers_vi.h"
-#include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
@@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
*wptr = temp;
}
-static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
-{
- union PM4_MES_TYPE_3_HEADER header;
-
- header.u32All = 0;
- header.opcode = opcode;
- header.count = packet_size / 4 - 2;
- header.type = PM4_TYPE_3;
-
- return header.u32All;
-}
-
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
bool *over_subscription)
@@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("Over subscribed runlist\n");
}
- map_queue_size = sizeof(struct pm4_mes_map_queues);
+ map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
- *rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
+ *rlib_size = process_count * pm->pmf->map_process_size +
queue_count * map_queue_size;
/*
@@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* when over subscription
*/
if (*over_subscription)
- *rlib_size += sizeof(struct pm4_mes_runlist);
+ *rlib_size += pm->pmf->runlist_size;
pr_debug("runlist ib size %d\n", *rlib_size);
}
@@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ mutex_lock(&pm->lock);
+
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
&pm->ib_buffer_obj);
if (retval) {
pr_err("Failed to allocate runlist IB\n");
- return retval;
+ goto out;
}
*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
@@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
memset(*rl_buffer, 0, *rl_buffer_size);
pm->allocated = true;
- return retval;
-}
-
-static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
- uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
- struct pm4_mes_runlist *packet;
- int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
-
- if (WARN_ON(!ib))
- return -EFAULT;
-
- /* Determine the number of processes to map together to HW:
- * it can not exceed the number of VMIDs available to the
- * scheduler, and it is determined by the smaller of the number
- * of processes in the runlist and kfd module parameter
- * hws_max_conc_proc.
- * Note: the arbitration between the number of VMIDs and
- * hws_max_conc_proc has been done in
- * kgd2kfd_device_init().
- */
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
-
- packet = (struct pm4_mes_runlist *)buffer;
-
- memset(buffer, 0, sizeof(struct pm4_mes_runlist));
- packet->header.u32All = build_pm4_header(IT_RUN_LIST,
- sizeof(struct pm4_mes_runlist));
-
- packet->bitfields4.ib_size = ib_size_in_dwords;
- packet->bitfields4.chain = chain ? 1 : 0;
- packet->bitfields4.offload_polling = 0;
- packet->bitfields4.valid = 1;
- packet->bitfields4.process_cnt = concurrent_proc_cnt;
- packet->ordinal2 = lower_32_bits(ib);
- packet->bitfields3.ib_base_hi = upper_32_bits(ib);
-
- return 0;
-}
-
-static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
- struct qcm_process_device *qpd)
-{
- struct pm4_mes_map_process *packet;
-
- packet = (struct pm4_mes_map_process *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
- packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
- sizeof(struct pm4_mes_map_process));
- packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
- packet->bitfields3.page_table_base = qpd->page_table_base;
- packet->bitfields10.gds_size = qpd->gds_size;
- packet->bitfields10.num_gws = qpd->num_gws;
- packet->bitfields10.num_oac = qpd->num_oac;
- packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
- packet->sh_mem_config = qpd->sh_mem_config;
- packet->sh_mem_bases = qpd->sh_mem_bases;
- packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
- packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
-
- packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
-
- packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
- packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
- return 0;
-}
-
-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q, bool is_static)
-{
- struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
-
- packet = (struct pm4_mes_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
- packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe_vi;
- packet->bitfields2.num_queues = 1;
- packet->bitfields2.queue_sel =
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_compute_vi;
-
- switch (q->properties.type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_latency_static_queue_vi;
- break;
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__debug_interface_queue_vi;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
- engine_sel__mes_map_queues__sdma0_vi;
- use_static = false; /* no static queues under SDMA */
- break;
- default:
- WARN(1, "queue type %d", q->properties.type);
- return -EINVAL;
- }
- packet->bitfields3.doorbell_offset =
- q->properties.doorbell_off;
-
- packet->mqd_addr_lo =
- lower_32_bits(q->gart_mqd_addr);
-
- packet->mqd_addr_hi =
- upper_32_bits(q->gart_mqd_addr);
-
- packet->wptr_addr_lo =
- lower_32_bits((uint64_t)q->properties.write_ptr);
-
- packet->wptr_addr_hi =
- upper_32_bits((uint64_t)q->properties.write_ptr);
-
- return 0;
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
}
static int pm_create_runlist_ib(struct packet_manager *pm,
@@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return -ENOMEM;
}
- retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+ retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval)
return retval;
proccesses_mapped++;
- inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
+ inc_wptr(&rl_wptr, pm->pmf->map_process_size,
alloc_size_bytes);
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
@@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
@@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
@@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
@@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
}
@@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("Finished map process and queues to runlist\n");
if (is_over_subscription)
- retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+ retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
@@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
-/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
- * of this packet
- * @gpu_addr - GPU address of the packet. It's a virtual address.
- * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
- * Return - length of the packet
- */
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
-{
- struct pm4_mec_release_mem *packet;
-
- WARN_ON(!buffer);
-
- packet = (struct pm4_mec_release_mem *)buffer;
- memset(buffer, 0, sizeof(*packet));
-
- packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
- sizeof(*packet));
-
- packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
- packet->bitfields2.tcl1_action_ena = 1;
- packet->bitfields2.tc_action_ena = 1;
- packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- packet->bitfields2.atc = 0;
-
- packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
- packet->bitfields3.int_sel =
- int_sel___release_mem__send_interrupt_after_write_confirm;
-
- packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
- packet->address_hi = upper_32_bits(gpu_addr);
-
- packet->data_lo = 0;
-
- return sizeof(*packet) / sizeof(unsigned int);
-}
-
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
+ switch (dqm->dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ /* PM4 packet structures on CIK are the same as on VI */
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ pm->pmf = &kfd_vi_pm_funcs;
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ pm->pmf = &kfd_v9_pm_funcs;
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->device_info->asic_family);
+ return -EINVAL;
+ }
+
pm->dqm = dqm;
mutex_init(&pm->lock);
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
@@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm)
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
- struct pm4_mes_set_resources *packet;
+ uint32_t *buffer, size;
int retval = 0;
+ size = pm->pmf->set_resources_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
- sizeof(*packet) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (!packet) {
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+ if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- memset(packet, 0, sizeof(struct pm4_mes_set_resources));
- packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
- sizeof(struct pm4_mes_set_resources));
-
- packet->bitfields2.queue_type =
- queue_type__mes_set_resources__hsa_interface_queue_hiq;
- packet->bitfields2.vmid_mask = res->vmid_mask;
- packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
- packet->bitfields7.oac_mask = res->oac_mask;
- packet->bitfields8.gds_heap_base = res->gds_heap_base;
- packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
- packet->gws_mask_lo = lower_32_bits(res->gws_mask);
- packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
- packet->queue_mask_lo = lower_32_bits(res->queue_mask);
- packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->set_resources(pm, buffer, res);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
- packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
+ packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
@@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_acquire_packet_buffer;
- retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
+ retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
rl_ib_size / sizeof(uint32_t), false);
if (retval)
goto fail_create_runlist;
@@ -499,37 +330,29 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint32_t fence_value)
{
- int retval;
- struct pm4_mes_query_status *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
if (WARN_ON(!fence_address))
return -EFAULT;
+ size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (retval)
- goto fail_acquire_packet_buffer;
-
- packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
- sizeof(struct pm4_mes_query_status));
-
- packet->bitfields2.context_id = 0;
- packet->bitfields2.interrupt_sel =
- interrupt_sel__mes_query_status__completion_status;
- packet->bitfields2.command =
- command__mes_query_status__fence_only_after_write_ack;
-
- packet->addr_hi = upper_32_bits((uint64_t)fence_address);
- packet->addr_lo = lower_32_bits((uint64_t)fence_address);
- packet->data_hi = upper_32_bits((uint64_t)fence_value);
- packet->data_lo = lower_32_bits((uint64_t)fence_value);
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-fail_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
@@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
uint32_t filter_param, bool reset,
unsigned int sdma_engine)
{
- int retval;
- uint32_t *buffer;
- struct pm4_mes_unmap_queues *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
+ size = pm->pmf->unmap_queues_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
- &buffer);
- if (retval)
- goto err_acquire_packet_buffer;
-
- packet = (struct pm4_mes_unmap_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
- pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
- filter, reset, type);
- packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
- sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- retval = -EINVAL;
- goto err_invalid;
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
}
- if (reset)
- packet->bitfields2.action =
- action__mes_unmap_queues__reset_queues;
+ retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
+ reset, sdma_engine);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
else
- packet->bitfields2.action =
- action__mes_unmap_queues__preempt_queues;
-
- switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
- packet->bitfields3a.pasid = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_queues;
- break;
- case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
- /* in this case, we do not preempt static queues */
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
- break;
- default:
- WARN(1, "filter %d", filter);
- retval = -EINVAL;
- goto err_invalid;
- }
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
-
- mutex_unlock(&pm->lock);
- return 0;
-
-err_invalid:
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-err_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
new file mode 100644
index 000000000000..f2bcf5c092ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -0,0 +1,583 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+ uint32_t count : 14;/* < number of DWORDs - 1 in the
+ * information body.
+ */
+ uint32_t type : 2; /* < packet identifier.
+ * It should be 3 for type 3 packets
+ */
+ };
+ uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+ queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t vmid_mask:16;
+ uint32_t unmap_latency:8;
+ uint32_t reserved1:5;
+ enum mes_set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t queue_mask_lo;
+ uint32_t queue_mask_hi;
+ uint32_t gws_mask_lo;
+ uint32_t gws_mask_hi;
+
+ union {
+ struct {
+ uint32_t oac_mask:16;
+ uint32_t reserved2:16;
+ } bitfields7;
+ uint32_t ordinal7;
+ };
+
+ union {
+ struct {
+ uint32_t gds_heap_base:6;
+ uint32_t reserved3:5;
+ uint32_t gds_heap_size:6;
+ uint32_t reserved4:15;
+ } bitfields8;
+ uint32_t ordinal8;
+ };
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:2;
+ uint32_t ib_base_lo:30;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t ib_base_hi;
+
+ union {
+ struct {
+ uint32_t ib_size:20;
+ uint32_t chain:1;
+ uint32_t offload_polling:1;
+ uint32_t reserved2:1;
+ uint32_t valid:1;
+ uint32_t process_cnt:4;
+ uint32_t reserved3:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t vm_context_page_table_base_addr_lo32;
+
+ uint32_t vm_context_page_table_base_addr_hi32;
+
+ uint32_t sh_mem_bases;
+
+ uint32_t sh_mem_config;
+
+ uint32_t sq_shader_tba_lo;
+
+ uint32_t sq_shader_tba_hi;
+
+ uint32_t sq_shader_tma_lo;
+
+ uint32_t sq_shader_tma_hi;
+
+ uint32_t reserved6;
+
+ uint32_t gds_addr_lo;
+
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:6;
+ uint32_t reserved7:1;
+ uint32_t sdma_enable:1;
+ uint32_t num_oac:4;
+ uint32_t reserved8:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields14;
+ uint32_t ordinal14;
+ };
+
+ uint32_t completion_signal_lo;
+
+ uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_PROCESS_VM--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
+#define PM4_MES_MAP_PROCESS_VM_DEFINED
+
+struct PM4_MES_MAP_PROCESS_VM {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ uint32_t reserved1;
+
+ uint32_t vm_context_cntl;
+
+ uint32_t reserved2;
+
+ uint32_t vm_context_page_table_end_addr_lo32;
+
+ uint32_t vm_context_page_table_end_addr_hi32;
+
+ uint32_t vm_context_page_table_start_addr_lo32;
+
+ uint32_t vm_context_page_table_start_addr_hi32;
+
+ uint32_t reserved3;
+
+ uint32_t reserved4;
+
+ uint32_t reserved5;
+
+ uint32_t reserved6;
+
+ uint32_t reserved7;
+
+ uint32_t reserved8;
+
+ uint32_t completion_signal_lo32;
+
+ uint32_t completion_signal_hi32;
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_enum {
+ queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_enum {
+ queue_type__mes_map_queues__normal_compute_vi = 0,
+ queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+ queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_enum {
+ alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_enum {
+ engine_sel__mes_map_queues__compute_vi = 0,
+ engine_sel__mes_map_queues__sdma0_vi = 2,
+ engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:4;
+ enum mes_map_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:15;
+ enum mes_map_queues_queue_type_enum queue_type:3;
+ enum mes_map_queues_alloc_format_enum alloc_format:2;
+ enum mes_map_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved3:1;
+ uint32_t check_disable:1;
+ uint32_t doorbell_offset:26;
+ uint32_t reserved4:4;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t mqd_addr_lo;
+ uint32_t mqd_addr_hi;
+ uint32_t wptr_addr_lo;
+ uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+ interrupt_sel__mes_query_status__completion_status = 0,
+ interrupt_sel__mes_query_status__process_status = 1,
+ interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+ command__mes_query_status__interrupt_only = 0,
+ command__mes_query_status__fence_only_immediate = 1,
+ command__mes_query_status__fence_only_after_write_ack = 2,
+ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+ engine_sel__mes_query_status__compute = 0,
+ engine_sel__mes_query_status__sdma0_queue = 2,
+ engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t context_id:28;
+ enum mes_query_status_interrupt_sel_enum interrupt_sel:2;
+ enum mes_query_status_command_enum command:2;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved2:2;
+ uint32_t doorbell_offset:26;
+ enum mes_query_status_engine_sel_enum engine_sel:3;
+ uint32_t reserved3:1;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data_lo;
+ uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+ action__mes_unmap_queues__preempt_queues = 0,
+ action__mes_unmap_queues__reset_queues = 1,
+ action__mes_unmap_queues__disable_process_queues = 2,
+ action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+ engine_sel__mes_unmap_queues__compute = 0,
+ engine_sel__mes_unmap_queues__sdma0 = 2,
+ engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct pm4_mes_unmap_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ enum mes_unmap_queues_action_enum action:2;
+ uint32_t reserved1:2;
+ enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:20;
+ enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved3:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved4:2;
+ uint32_t doorbell_offset0:26;
+ int32_t reserved5:4;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved6:2;
+ uint32_t doorbell_offset1:26;
+ uint32_t reserved7:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+ union {
+ struct {
+ uint32_t reserved8:2;
+ uint32_t doorbell_offset2:26;
+ uint32_t reserved9:4;
+ } bitfields5;
+ uint32_t ordinal5;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ uint32_t doorbell_offset3:26;
+ uint32_t reserved11:4;
+ } bitfields6;
+ uint32_t ordinal6;
+ };
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+
+enum mec_release_mem_event_index_enum {
+ event_index__mec_release_mem__end_of_pipe = 5,
+ event_index__mec_release_mem__shader_done = 6
+};
+
+enum mec_release_mem_cache_policy_enum {
+ cache_policy__mec_release_mem__lru = 0,
+ cache_policy__mec_release_mem__stream = 1
+};
+
+enum mec_release_mem_pq_exe_status_enum {
+ pq_exe_status__mec_release_mem__default = 0,
+ pq_exe_status__mec_release_mem__phase_update = 1
+};
+
+enum mec_release_mem_dst_sel_enum {
+ dst_sel__mec_release_mem__memory_controller = 0,
+ dst_sel__mec_release_mem__tc_l2 = 1,
+ dst_sel__mec_release_mem__queue_write_pointer_register = 2,
+ dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum mec_release_mem_int_sel_enum {
+ int_sel__mec_release_mem__none = 0,
+ int_sel__mec_release_mem__send_interrupt_only = 1,
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel__mec_release_mem__send_data_after_write_confirm = 3,
+ int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
+};
+
+enum mec_release_mem_data_sel_enum {
+ data_sel__mec_release_mem__none = 0,
+ data_sel__mec_release_mem__send_32_bit_low = 1,
+ data_sel__mec_release_mem__send_64_bit_data = 2,
+ data_sel__mec_release_mem__send_gpu_clock_counter = 3,
+ data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel__mec_release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum mec_release_mem_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ uint32_t reserved3:1;
+ uint32_t tc_nc_action_ena:1;
+ uint32_t tc_wc_action_ena:1;
+ uint32_t tc_md_action_ena:1;
+ uint32_t reserved4:3;
+ enum mec_release_mem_cache_policy_enum cache_policy:2;
+ uint32_t reserved5:2;
+ enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
+ uint32_t reserved6:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved7:16;
+ enum mec_release_mem_dst_sel_enum dst_sel:2;
+ uint32_t reserved8:6;
+ enum mec_release_mem_int_sel_enum int_sel:3;
+ uint32_t reserved9:2;
+ enum mec_release_mem_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ uint32_t reserved11:3;
+ uint32_t address_lo_64b:29;
+ } bitfields4b;
+ uint32_t reserved12;
+ unsigned int ordinal4;
+ };
+
+ union {
+ uint32_t address_hi;
+ uint32_t reserved13;
+ uint32_t ordinal5;
+ };
+
+ union {
+ uint32_t data_lo;
+ uint32_t cmp_data_lo;
+ struct {
+ uint32_t dw_offset:16;
+ uint32_t num_dwords:16;
+ } bitfields6c;
+ uint32_t reserved14;
+ uint32_t ordinal6;
+ };
+
+ union {
+ uint32_t data_hi;
+ uint32_t cmp_data_hi;
+ uint32_t reserved15;
+ uint32_t reserved16;
+ uint32_t ordinal7;
+ };
+
+ uint32_t int_ctxid;
+
+};
+
+#endif
+
+enum {
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 96a9cc0f02c9..5e3990bb4c4b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -39,11 +39,37 @@
#include "amd_shared.h"
+#define KFD_MAX_RING_ENTRY_SIZE 8
+
#define KFD_SYSFS_FILE_MODE 0444
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
-#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:0] - MMAP offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ * defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+ << KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+ & KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
+ >> KFD_MMAP_GPU_ID_SHIFT)
+
+#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
+#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -55,9 +81,6 @@
#define KFD_CIK_HIQ_PIPE 4
#define KFD_CIK_HIQ_QUEUE 0
-/* GPU ID hash width in bits */
-#define KFD_GPU_ID_HASH_WIDTH 16
-
/* Macro for allocating structures */
#define kfd_alloc_struct(ptr_to_struct) \
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
@@ -116,6 +139,11 @@ extern int debug_largebar;
*/
extern int ignore_crat;
+/*
+ * Set sh_mem_config.retry_disable on Vega10
+ */
+extern int vega10_noretry;
+
/**
* enum kfd_sched_policy
*
@@ -148,6 +176,8 @@ enum cache_policy {
cache_policy_noncoherent
};
+#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
@@ -160,6 +190,7 @@ struct kfd_device_info {
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
unsigned int max_no_of_hqd;
+ unsigned int doorbell_size;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -173,6 +204,7 @@ struct kfd_mem_obj {
uint32_t range_end;
uint64_t gpu_addr;
uint32_t *cpu_ptr;
+ void *gtt_mem;
};
struct kfd_vmid_info {
@@ -364,7 +396,7 @@ struct queue_properties {
uint32_t queue_percent;
uint32_t *read_ptr;
uint32_t *write_ptr;
- uint32_t __iomem *doorbell_ptr;
+ void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
@@ -427,6 +459,7 @@ struct queue {
uint32_t queue;
unsigned int sdma_id;
+ unsigned int doorbell_id;
struct kfd_process *process;
struct kfd_dev *device;
@@ -501,6 +534,9 @@ struct qcm_process_device {
/* IB memory */
uint64_t ib_base;
void *ib_kaddr;
+
+ /* doorbell resources per process per device */
+ unsigned long *doorbell_bitmap;
};
/* KFD Memory Eviction */
@@ -512,6 +548,8 @@ struct qcm_process_device {
/* Approx. time before evicting the process again */
#define PROCESS_ACTIVE_TIME_MS 10
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
@@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
@@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */
@@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void);
void kfd_pasid_free(unsigned int pasid);
/* Doorbells */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma);
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
-void write_kernel_doorbell(u32 __iomem *db, u32 value);
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell(void __iomem *db, u32 value);
+void write_kernel_doorbell64(void __iomem *db, u64 value);
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id);
+ unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
@@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -832,8 +877,42 @@ struct packet_manager {
bool allocated;
struct kfd_mem_obj *ib_buffer_obj;
unsigned int ib_size_bytes;
+
+ const struct packet_manager_funcs *pmf;
+};
+
+struct packet_manager_funcs {
+ /* Support ASIC-specific packet formats for PM4 packets */
+ int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd);
+ int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain);
+ int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
+ int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static);
+ int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter mode,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine);
+ int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value);
+ int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
+
+ /* Packet sizes */
+ int map_process_size;
+ int runlist_size;
+ int set_resources_size;
+ int map_queues_size;
+ int unmap_queues_size;
+ int query_status_size;
+ int release_mem_size;
};
+extern const struct packet_manager_funcs kfd_vi_pm_funcs;
+extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
@@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+/* Following PM funcs can be shared among VI and AI */
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+
extern const struct kfd_device_global_init_class device_global_init_class_cik;
void kfd_event_init_process(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1711ad0642f7..1d80b4f7c681 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
+ kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfree(pdd);
@@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
- offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+ offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
+ << PAGE_SHIFT;
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);
@@ -585,6 +587,31 @@ err_alloc_process:
return ERR_PTR(err);
}
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+ struct kfd_dev *dev)
+{
+ unsigned int i;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family))
+ return 0;
+
+ qpd->doorbell_bitmap =
+ kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ BITS_PER_BYTE), GFP_KERNEL);
+ if (!qpd->doorbell_bitmap)
+ return -ENOMEM;
+
+ /* Mask out any reserved doorbells */
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
+ if ((dev->shared_resources.reserved_doorbell_mask & i) ==
+ dev->shared_resources.reserved_doorbell_val) {
+ set_bit(i, qpd->doorbell_bitmap);
+ pr_debug("reserved doorbell 0x%03x\n", i);
+ }
+
+ return 0;
+}
+
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
@@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
+ if (init_doorbell_bitmap(&pdd->qpd, dev)) {
+ pr_err("Failed to init doorbell for process\n");
+ kfree(pdd);
+ return NULL;
+ }
+
pdd->dev = dev;
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-static int process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r = 0;
@@ -844,7 +877,7 @@ fail:
}
/* process_restore_queues - Restore all user queues of a process */
-static int process_restore_queues(struct kfd_process *p)
+int kfd_process_restore_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r, ret = 0;
@@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid %d\n", p->pasid);
- ret = process_evict_queues(p);
+ ret = kfd_process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work)
return;
}
- ret = process_restore_queues(p);
+ ret = kfd_process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid %d\n", p->pasid);
else
@@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (process_evict_queues(p))
+ if (kfd_process_evict_queues(p))
pr_err("Failed to suspend process %d\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void)
return ret;
}
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
- struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
struct kfd_process_device *pdd;
struct qcm_process_device *qpd;
- if (!dev)
- return -EINVAL;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
pr_err("Incorrect CWSR mapping size.\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7817e327ea6d..d65ce0436b31 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
- q_properties->doorbell_off =
- kfd_queue_id_to_doorbell(dev, pqm->process, qid);
-
/* let DQM handle it*/
q_properties->vmid = 0;
q_properties->queue_id = qid;
@@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("DQM create queue failed\n");
+ pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+ pqm->process->pasid, type, retval);
goto err_create_queue;
}
+ if (q)
+ /* Return the doorbell offset within the doorbell page
+ * to the caller so it can be passed up to user mode
+ * (in bytes).
+ */
+ properties->doorbell_off =
+ (q->properties.doorbell_off * sizeof(uint32_t)) &
+ (kfd_doorbell_process_slice(dev) - 1);
+
pr_debug("PQM After DQM create queue\n");
list_add(&pqn->process_queue_list, &pqm->queues);
@@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
- pr_debug("Destroy queue failed, returned %d\n", retval);
- goto err_destroy_queue;
+ pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+ pqm->process->pasid,
+ pqn->q->properties.queue_id, retval);
+ if (retval != -ETIME)
+ goto err_destroy_queue;
}
uninit_queue(pqn->q);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index a5315d4f1c95..6dcd621e5b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q)
pr_debug("Queue Address: 0x%llX\n", q->queue_address);
pr_debug("Queue Id: %u\n", q->queue_id);
pr_debug("Queue Process Vmid: %u\n", q->vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
}
@@ -53,8 +53,8 @@ void print_queue(struct queue *q)
pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
pr_debug("Queue Id: %u\n", q->properties.queue_id);
pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ac28abc94e57..bc95d4dfee2e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->gpu->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index eb54cfcaf039..7d9c3f948dff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -45,6 +45,7 @@
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
new file mode 100644
index 000000000000..0bc0b25cb410
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HSA_SOC15_INT_H_INCLUDED
+#define HSA_SOC15_INT_H_INCLUDED
+
+#include "soc15_ih_clientid.h"
+
+#define SOC15_INTSRC_CP_END_OF_PIPE 181
+#define SOC15_INTSRC_CP_BAD_OPCODE 183
+#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
+#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_SDMA_TRAP 224
+
+
+#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
+#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
+#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
+#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
+#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
+#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
+#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
+#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
+#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 5b124a67404c..d5d4586e6176 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -9,14 +9,6 @@ config DRM_AMD_DC
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
-config DRM_AMD_DC_PRE_VEGA
- bool "DC support for Polaris and older ASICs"
- default y
- help
- Choose this option to enable the new DC support for older asics
- by default. This includes Polaris, Carrizo, Tonga, Bonaire,
- and Hawaii.
-
config DRM_AMD_DC_FBC
bool "AMD FBC - Enable Frame Buffer Compression"
depends on DRM_AMD_DC
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 27579443cdc5..f9b9ab90558c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -433,11 +433,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
- if (amdgpu_dc_log)
- init_data.log_mask = DC_DEFAULT_LOG_MASK;
- else
- init_data.log_mask = DC_MIN_LOG_MASK;
-
/*
* TODO debug why this doesn't work on Raven
*/
@@ -649,18 +644,6 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
static int dm_resume(void *handle)
{
struct amdgpu_device *adev = handle;
- struct amdgpu_display_manager *dm = &adev->dm;
- int ret = 0;
-
- /* power on hardware */
- dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
-
- ret = amdgpu_dm_display_resume(adev);
- return ret;
-}
-
-int amdgpu_dm_display_resume(struct amdgpu_device *adev)
-{
struct drm_device *ddev = adev->ddev;
struct amdgpu_display_manager *dm = &adev->dm;
struct amdgpu_dm_connector *aconnector;
@@ -671,10 +654,12 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
struct drm_plane *plane;
struct drm_plane_state *new_plane_state;
struct dm_plane_state *dm_new_plane_state;
-
- int ret = 0;
+ int ret;
int i;
+ /* power on hardware */
+ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+
/* program HPD filter */
dc_resume(dm->dc);
@@ -688,8 +673,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
amdgpu_dm_irq_resume_early(adev);
/* Do detection*/
- list_for_each_entry(connector,
- &ddev->mode_config.connector_list, head) {
+ list_for_each_entry(connector, &ddev->mode_config.connector_list, head) {
aconnector = to_amdgpu_dm_connector(connector);
/*
@@ -711,7 +695,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
}
/* Force mode set in atomic comit */
- for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i)
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i)
new_crtc_state->active_changed = true;
/*
@@ -719,7 +703,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
* them here, since they were duplicated as part of the suspend
* procedure.
*/
- for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) {
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
if (dm_new_crtc_state->stream) {
WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
@@ -728,7 +712,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
}
}
- for_each_new_plane_in_state(adev->dm.cached_state, plane, new_plane_state, i) {
+ for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
dm_new_plane_state = to_dm_plane_state(new_plane_state);
if (dm_new_plane_state->dc_state) {
WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
@@ -737,9 +721,9 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev)
}
}
- ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state);
+ ret = drm_atomic_helper_resume(ddev, dm->cached_state);
- adev->dm.cached_state = NULL;
+ dm->cached_state = NULL;
amdgpu_dm_irq_resume_late(adev);
@@ -927,6 +911,7 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector)
drm_mode_connector_update_edid_property(connector, NULL);
aconnector->num_modes = 0;
aconnector->dc_sink = NULL;
+ aconnector->edid = NULL;
}
mutex_unlock(&dev->mode_config.mutex);
@@ -1131,6 +1116,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_VEGA10 ||
adev->asic_type == CHIP_VEGA12 ||
+ adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_RAVEN)
client_id = SOC15_IH_CLIENTID_DCE;
@@ -1529,8 +1515,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_POLARIS11:
case CHIP_POLARIS10:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
if (dce110_register_irq_handlers(dm->adev)) {
DRM_ERROR("DM: Failed to initialize IRQ\n");
goto fail;
@@ -1549,7 +1537,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
break;
#endif
default:
- DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type);
+ DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
goto fail;
}
@@ -1657,7 +1645,6 @@ static ssize_t s3_debug_store(struct device *device,
if (ret == 0) {
if (s3_state) {
dm_resume(adev);
- amdgpu_dm_display_resume(adev);
drm_kms_helper_hotplug_event(adev->ddev);
} else
dm_suspend(adev);
@@ -1722,6 +1709,7 @@ static int dm_early_init(void *handle)
adev->mode_info.plane_type = dm_plane_type_default;
break;
case CHIP_POLARIS10:
+ case CHIP_VEGAM:
adev->mode_info.num_crtc = 6;
adev->mode_info.num_hpd = 6;
adev->mode_info.num_dig = 6;
@@ -1729,6 +1717,7 @@ static int dm_early_init(void *handle)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
adev->mode_info.num_crtc = 6;
adev->mode_info.num_hpd = 6;
adev->mode_info.num_dig = 6;
@@ -1743,7 +1732,7 @@ static int dm_early_init(void *handle)
break;
#endif
default:
- DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type);
+ DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
return -EINVAL;
}
@@ -1848,7 +1837,7 @@ static bool fill_rects_from_plane_state(const struct drm_plane_state *state,
static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
uint64_t *tiling_flags)
{
- struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+ struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]);
int r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
@@ -1977,6 +1966,7 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev,
if (adev->asic_type == CHIP_VEGA10 ||
adev->asic_type == CHIP_VEGA12 ||
+ adev->asic_type == CHIP_VEGA20 ||
adev->asic_type == CHIP_RAVEN) {
/* Fill GFX9 params */
plane_state->tiling_info.gfx9.num_pipes =
@@ -2017,7 +2007,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev,
const struct amdgpu_framebuffer *amdgpu_fb =
to_amdgpu_framebuffer(plane_state->fb);
const struct drm_crtc *crtc = plane_state->crtc;
- struct dc_transfer_func *input_tf;
int ret = 0;
if (!fill_rects_from_plane_state(plane_state, dc_plane_state))
@@ -2031,13 +2020,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev,
if (ret)
return ret;
- input_tf = dc_create_transfer_func();
-
- if (input_tf == NULL)
- return -ENOMEM;
-
- dc_plane_state->in_transfer_func = input_tf;
-
/*
* Always set input transfer function, since plane state is refreshed
* every time.
@@ -2206,7 +2188,6 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
const struct drm_connector *connector)
{
struct dc_crtc_timing *timing_out = &stream->timing;
- struct dc_transfer_func *tf = dc_create_transfer_func();
memset(timing_out, 0, sizeof(struct dc_crtc_timing));
@@ -2250,9 +2231,8 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
stream->output_color_space = get_output_color_space(timing_out);
- tf->type = TF_TYPE_PREDEFINED;
- tf->tf = TRANSFER_FUNCTION_SRGB;
- stream->out_transfer_func = tf;
+ stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
+ stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
}
static void fill_audio_info(struct audio_info *audio_info,
@@ -2488,6 +2468,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
update_stream_signal(stream);
+ if (dm_state && dm_state->freesync_capable)
+ stream->ignore_msa_timing_param = true;
+
return stream;
}
@@ -2710,18 +2693,15 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
const struct dc_link *link = aconnector->dc_link;
struct amdgpu_device *adev = connector->dev->dev_private;
struct amdgpu_display_manager *dm = &adev->dm;
+
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
- link->type != dc_connection_none) {
- amdgpu_dm_register_backlight_device(dm);
-
- if (dm->backlight_dev) {
- backlight_device_unregister(dm->backlight_dev);
- dm->backlight_dev = NULL;
- }
-
+ link->type != dc_connection_none &&
+ dm->backlight_dev) {
+ backlight_device_unregister(dm->backlight_dev);
+ dm->backlight_dev = NULL;
}
#endif
drm_connector_unregister(connector);
@@ -2855,7 +2835,7 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector)
create_eml_sink(aconnector);
}
-int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
+enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
int result = MODE_ERROR;
@@ -3058,8 +3038,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
}
afb = to_amdgpu_framebuffer(new_state->fb);
-
- obj = afb->obj;
+ obj = new_state->fb->obj[0];
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
r = amdgpu_bo_reserve(rbo, false);
@@ -3067,12 +3046,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
return r;
if (plane->type != DRM_PLANE_TYPE_CURSOR)
- domain = amdgpu_display_framebuffer_domains(adev);
+ domain = amdgpu_display_supported_domains(adev);
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
r = amdgpu_bo_pin(rbo, domain, &afb->address);
-
amdgpu_bo_unreserve(rbo);
if (unlikely(r != 0)) {
@@ -3123,14 +3101,12 @@ static void dm_plane_helper_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
- struct amdgpu_framebuffer *afb;
int r;
if (!old_state->fb)
return;
- afb = to_amdgpu_framebuffer(old_state->fb);
- rbo = gem_to_amdgpu_bo(afb->obj);
+ rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
DRM_ERROR("failed to reserve rbo before unpin\n");
@@ -3773,7 +3749,7 @@ static void remove_stream(struct amdgpu_device *adev,
static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
struct dc_cursor_position *position)
{
- struct amdgpu_crtc *amdgpu_crtc = amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
int x, y;
int xorigin = 0, yorigin = 0;
@@ -3905,7 +3881,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
int r, vpos, hpos;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb);
- struct amdgpu_bo *abo = gem_to_amdgpu_bo(afb->obj);
+ struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]);
struct amdgpu_device *adev = crtc->dev->dev_private;
bool async_flip = (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
struct dc_flip_addrs addr = { {0} };
@@ -3986,6 +3962,96 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
}
+/*
+ * TODO this whole function needs to go
+ *
+ * dc_surface_update is needlessly complex. See if we can just replace this
+ * with a dc_plane_state and follow the atomic model a bit more closely here.
+ */
+static bool commit_planes_to_stream(
+ struct dc *dc,
+ struct dc_plane_state **plane_states,
+ uint8_t new_plane_count,
+ struct dm_crtc_state *dm_new_crtc_state,
+ struct dm_crtc_state *dm_old_crtc_state,
+ struct dc_state *state)
+{
+ /* no need to dynamically allocate this. it's pretty small */
+ struct dc_surface_update updates[MAX_SURFACES];
+ struct dc_flip_addrs *flip_addr;
+ struct dc_plane_info *plane_info;
+ struct dc_scaling_info *scaling_info;
+ int i;
+ struct dc_stream_state *dc_stream = dm_new_crtc_state->stream;
+ struct dc_stream_update *stream_update =
+ kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
+
+ if (!stream_update) {
+ BREAK_TO_DEBUGGER();
+ return false;
+ }
+
+ flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
+ GFP_KERNEL);
+ plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
+ GFP_KERNEL);
+ scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
+ GFP_KERNEL);
+
+ if (!flip_addr || !plane_info || !scaling_info) {
+ kfree(flip_addr);
+ kfree(plane_info);
+ kfree(scaling_info);
+ kfree(stream_update);
+ return false;
+ }
+
+ memset(updates, 0, sizeof(updates));
+
+ stream_update->src = dc_stream->src;
+ stream_update->dst = dc_stream->dst;
+ stream_update->out_transfer_func = dc_stream->out_transfer_func;
+
+ for (i = 0; i < new_plane_count; i++) {
+ updates[i].surface = plane_states[i];
+ updates[i].gamma =
+ (struct dc_gamma *)plane_states[i]->gamma_correction;
+ updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
+ flip_addr[i].address = plane_states[i]->address;
+ flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
+ plane_info[i].color_space = plane_states[i]->color_space;
+ plane_info[i].format = plane_states[i]->format;
+ plane_info[i].plane_size = plane_states[i]->plane_size;
+ plane_info[i].rotation = plane_states[i]->rotation;
+ plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
+ plane_info[i].stereo_format = plane_states[i]->stereo_format;
+ plane_info[i].tiling_info = plane_states[i]->tiling_info;
+ plane_info[i].visible = plane_states[i]->visible;
+ plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
+ plane_info[i].dcc = plane_states[i]->dcc;
+ scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
+ scaling_info[i].src_rect = plane_states[i]->src_rect;
+ scaling_info[i].dst_rect = plane_states[i]->dst_rect;
+ scaling_info[i].clip_rect = plane_states[i]->clip_rect;
+
+ updates[i].flip_addr = &flip_addr[i];
+ updates[i].plane_info = &plane_info[i];
+ updates[i].scaling_info = &scaling_info[i];
+ }
+
+ dc_commit_updates_for_stream(
+ dc,
+ updates,
+ new_plane_count,
+ dc_stream, stream_update, plane_states, state);
+
+ kfree(flip_addr);
+ kfree(plane_info);
+ kfree(scaling_info);
+ kfree(stream_update);
+ return true;
+}
+
static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct drm_device *dev,
struct amdgpu_display_manager *dm,
@@ -4001,6 +4067,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct drm_crtc_state *new_pcrtc_state =
drm_atomic_get_new_crtc_state(state, pcrtc);
struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state);
+ struct dm_crtc_state *dm_old_crtc_state =
+ to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc));
struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
int planes_count = 0;
unsigned long flags;
@@ -4037,7 +4105,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
}
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- if (!pflip_needed) {
+ if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) {
WARN_ON(!dm_new_plane_state->dc_state);
plane_states_constructed[planes_count] = dm_new_plane_state->dc_state;
@@ -4079,10 +4147,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
- if (false == dc_commit_planes_to_stream(dm->dc,
+
+ if (false == commit_planes_to_stream(dm->dc,
plane_states_constructed,
planes_count,
- dc_stream_attach,
+ acrtc_state,
+ dm_old_crtc_state,
dm_state->context))
dm_error("%s: Failed to attach plane!\n", __func__);
} else {
@@ -4307,8 +4377,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
struct dc_stream_status *status = NULL;
- if (acrtc)
+ if (acrtc) {
new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+ }
/* Skip any modesets/resets */
if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state))
@@ -4331,11 +4403,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
WARN_ON(!status->plane_count);
/*TODO How it works with MPO ?*/
- if (!dc_commit_planes_to_stream(
+ if (!commit_planes_to_stream(
dm->dc,
status->plane_states,
status->plane_count,
- dm_new_crtc_state->stream,
+ dm_new_crtc_state,
+ to_dm_crtc_state(old_crtc_state),
dm_state->context))
dm_error("%s: Failed to update stream scaling!\n", __func__);
}
@@ -4582,7 +4655,6 @@ static int dm_update_crtcs_state(struct dc *dc,
drm_old_conn_state = drm_atomic_get_old_connector_state(state,
&aconnector->base);
-
if (IS_ERR(drm_new_conn_state)) {
ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
break;
@@ -4769,7 +4841,8 @@ static int dm_update_planes_state(struct dc *dc,
/* Remove any changed/removed planes */
if (!enable) {
- if (pflip_needed)
+ if (pflip_needed &&
+ plane->type != DRM_PLANE_TYPE_OVERLAY)
continue;
if (!old_plane_crtc)
@@ -4816,7 +4889,8 @@ static int dm_update_planes_state(struct dc *dc,
if (!dm_new_crtc_state->stream)
continue;
- if (pflip_needed)
+ if (pflip_needed &&
+ plane->type != DRM_PLANE_TYPE_OVERLAY)
continue;
WARN_ON(dm_new_plane_state->dc_state);
@@ -5023,17 +5097,24 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector,
struct edid *edid)
{
int i;
- uint64_t val_capable;
bool edid_check_required;
struct detailed_timing *timing;
struct detailed_non_pixel *data;
struct detailed_data_monitor_range *range;
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
+ struct dm_connector_state *dm_con_state;
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = dev->dev_private;
+ if (!connector->state) {
+ DRM_ERROR("%s - Connector has no state", __func__);
+ return;
+ }
+
+ dm_con_state = to_dm_connector_state(connector->state);
+
edid_check_required = false;
if (!amdgpu_dm_connector->dc_sink) {
DRM_ERROR("dc_sink NULL, could not add free_sync module.\n");
@@ -5052,7 +5133,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector,
amdgpu_dm_connector);
}
}
- val_capable = 0;
+ dm_con_state->freesync_capable = false;
if (edid_check_required == true && (edid->version > 1 ||
(edid->version == 1 && edid->revision > 1))) {
for (i = 0; i < 4; i++) {
@@ -5088,7 +5169,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector,
amdgpu_dm_connector->min_vfreq * 1000000;
amdgpu_dm_connector->caps.max_refresh_in_micro_hz =
amdgpu_dm_connector->max_vfreq * 1000000;
- val_capable = 1;
+ dm_con_state->freesync_capable = true;
}
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index b68400c1154b..d5aa89ad5571 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -28,7 +28,6 @@
#include <drm/drmP.h>
#include <drm/drm_atomic.h>
-#include "dc.h"
/*
* This file contains the definition for amdgpu_display_manager
@@ -53,6 +52,7 @@
struct amdgpu_device;
struct drm_device;
struct amdgpu_dm_irq_handler_data;
+struct dc;
struct amdgpu_dm_prev_state {
struct drm_framebuffer *fb;
@@ -220,6 +220,7 @@ struct dm_connector_state {
uint8_t underscan_hborder;
bool underscan_enable;
struct mod_freesync_user_enable user_enable;
+ bool freesync_capable;
};
#define to_dm_connector_state(x)\
@@ -246,7 +247,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
struct dc_link *link,
int link_index);
-int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
+enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode);
void dm_restore_drm_connector_state(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 25f064c01038..b329393307e5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -25,6 +25,7 @@
#include "amdgpu_mode.h"
#include "amdgpu_dm.h"
+#include "dc.h"
#include "modules/color/color_gamma.h"
#define MAX_DRM_LUT_VALUE 0xFFFF
@@ -87,9 +88,9 @@ static void __drm_lut_to_dc_gamma(struct drm_color_lut *lut,
g = drm_color_lut_extract(lut[i].green, 16);
b = drm_color_lut_extract(lut[i].blue, 16);
- gamma->entries.red[i] = dal_fixed31_32_from_int(r);
- gamma->entries.green[i] = dal_fixed31_32_from_int(g);
- gamma->entries.blue[i] = dal_fixed31_32_from_int(b);
+ gamma->entries.red[i] = dc_fixpt_from_int(r);
+ gamma->entries.green[i] = dc_fixpt_from_int(g);
+ gamma->entries.blue[i] = dc_fixpt_from_int(b);
}
return;
}
@@ -100,9 +101,9 @@ static void __drm_lut_to_dc_gamma(struct drm_color_lut *lut,
g = drm_color_lut_extract(lut[i].green, 16);
b = drm_color_lut_extract(lut[i].blue, 16);
- gamma->entries.red[i] = dal_fixed31_32_from_fraction(r, MAX_DRM_LUT_VALUE);
- gamma->entries.green[i] = dal_fixed31_32_from_fraction(g, MAX_DRM_LUT_VALUE);
- gamma->entries.blue[i] = dal_fixed31_32_from_fraction(b, MAX_DRM_LUT_VALUE);
+ gamma->entries.red[i] = dc_fixpt_from_fraction(r, MAX_DRM_LUT_VALUE);
+ gamma->entries.green[i] = dc_fixpt_from_fraction(g, MAX_DRM_LUT_VALUE);
+ gamma->entries.blue[i] = dc_fixpt_from_fraction(b, MAX_DRM_LUT_VALUE);
}
}
@@ -207,7 +208,7 @@ void amdgpu_dm_set_ctm(struct dm_crtc_state *crtc)
for (i = 0; i < 12; i++) {
/* Skip 4th element */
if (i % 4 == 3) {
- stream->gamut_remap_matrix.matrix[i] = dal_fixed31_32_zero;
+ stream->gamut_remap_matrix.matrix[i] = dc_fixpt_zero;
continue;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ca0b08bfa2cf..bd449351803f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -330,11 +330,6 @@ bool dm_helpers_dp_mst_send_payload_allocation(
return true;
}
-bool dm_helpers_dc_conn_log(struct dc_context *ctx, struct log_entry *entry, enum dc_log_type event)
-{
- return true;
-}
-
void dm_dtn_log_begin(struct dc_context *ctx)
{}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index 89342b48be6b..0229c7edb8ad 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -37,8 +37,17 @@
unsigned long long dm_get_timestamp(struct dc_context *ctx)
{
- /* TODO: return actual timestamp */
- return 0;
+ struct timespec64 time;
+
+ getrawmonotonic64(&time);
+ return timespec64_to_ns(&time);
+}
+
+unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
+ unsigned long long current_time_stamp,
+ unsigned long long last_time_stamp)
+{
+ return current_time_stamp - last_time_stamp;
}
void dm_perf_trace_timestamp(const char *func_name, unsigned int line)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index bca33bd9a0d2..b49ea96b5dae 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -24,7 +24,7 @@
# It provides the general basic services required by other DAL
# subcomponents.
-BASICS = conversion.o fixpt31_32.o fixpt32_32.o \
+BASICS = conversion.o fixpt31_32.o \
logger.o log_helpers.o vector.o
AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 310964915a83..50b47f11875c 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -41,22 +41,22 @@ uint16_t fixed_point_to_int_frac(
uint16_t result;
- uint16_t d = (uint16_t)dal_fixed31_32_floor(
- dal_fixed31_32_abs(
+ uint16_t d = (uint16_t)dc_fixpt_floor(
+ dc_fixpt_abs(
arg));
if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
- numerator = (uint16_t)dal_fixed31_32_round(
- dal_fixed31_32_mul_int(
+ numerator = (uint16_t)dc_fixpt_round(
+ dc_fixpt_mul_int(
arg,
divisor));
else {
- numerator = dal_fixed31_32_floor(
- dal_fixed31_32_sub(
- dal_fixed31_32_from_int(
+ numerator = dc_fixpt_floor(
+ dc_fixpt_sub(
+ dc_fixpt_from_int(
1LL << integer_bits),
- dal_fixed31_32_recip(
- dal_fixed31_32_from_int(
+ dc_fixpt_recip(
+ dc_fixpt_from_int(
divisor))));
}
@@ -66,8 +66,8 @@ uint16_t fixed_point_to_int_frac(
result = (uint16_t)(
(1 << (integer_bits + fractional_bits + 1)) + numerator);
- if ((result != 0) && dal_fixed31_32_lt(
- arg, dal_fixed31_32_zero))
+ if ((result != 0) && dc_fixpt_lt(
+ arg, dc_fixpt_zero))
result |= 1 << (integer_bits + fractional_bits);
return result;
@@ -84,15 +84,15 @@ void convert_float_matrix(
uint32_t buffer_size)
{
const struct fixed31_32 min_2_13 =
- dal_fixed31_32_from_fraction(S2D13_MIN, DIVIDER);
+ dc_fixpt_from_fraction(S2D13_MIN, DIVIDER);
const struct fixed31_32 max_2_13 =
- dal_fixed31_32_from_fraction(S2D13_MAX, DIVIDER);
+ dc_fixpt_from_fraction(S2D13_MAX, DIVIDER);
uint32_t i;
for (i = 0; i < buffer_size; ++i) {
uint32_t reg_value =
fixed_point_to_int_frac(
- dal_fixed31_32_clamp(
+ dc_fixpt_clamp(
flt[i],
min_2_13,
max_2_13),
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 8a9bba879207..e61dd97d0928 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -26,13 +26,13 @@
#include "dm_services.h"
#include "include/fixed31_32.h"
-static inline uint64_t abs_i64(
- int64_t arg)
+static inline unsigned long long abs_i64(
+ long long arg)
{
if (arg > 0)
- return (uint64_t)arg;
+ return (unsigned long long)arg;
else
- return (uint64_t)(-arg);
+ return (unsigned long long)(-arg);
}
/*
@@ -40,12 +40,12 @@ static inline uint64_t abs_i64(
* result = dividend / divisor
* *remainder = dividend % divisor
*/
-static inline uint64_t complete_integer_division_u64(
- uint64_t dividend,
- uint64_t divisor,
- uint64_t *remainder)
+static inline unsigned long long complete_integer_division_u64(
+ unsigned long long dividend,
+ unsigned long long divisor,
+ unsigned long long *remainder)
{
- uint64_t result;
+ unsigned long long result;
ASSERT(divisor);
@@ -64,30 +64,28 @@ static inline uint64_t complete_integer_division_u64(
#define GET_FRACTIONAL_PART(x) \
(FRACTIONAL_PART_MASK & (x))
-struct fixed31_32 dal_fixed31_32_from_fraction(
- int64_t numerator,
- int64_t denominator)
+struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator)
{
struct fixed31_32 res;
bool arg1_negative = numerator < 0;
bool arg2_negative = denominator < 0;
- uint64_t arg1_value = arg1_negative ? -numerator : numerator;
- uint64_t arg2_value = arg2_negative ? -denominator : denominator;
+ unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
+ unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
- uint64_t remainder;
+ unsigned long long remainder;
/* determine integer part */
- uint64_t res_value = complete_integer_division_u64(
+ unsigned long long res_value = complete_integer_division_u64(
arg1_value, arg2_value, &remainder);
ASSERT(res_value <= LONG_MAX);
/* determine fractional part */
{
- uint32_t i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
+ unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
do {
remainder <<= 1;
@@ -103,14 +101,14 @@ struct fixed31_32 dal_fixed31_32_from_fraction(
/* round up LSB */
{
- uint64_t summand = (remainder << 1) >= arg2_value;
+ unsigned long long summand = (remainder << 1) >= arg2_value;
ASSERT(res_value <= LLONG_MAX - summand);
res_value += summand;
}
- res.value = (int64_t)res_value;
+ res.value = (long long)res_value;
if (arg1_negative ^ arg2_negative)
res.value = -res.value;
@@ -118,79 +116,23 @@ struct fixed31_32 dal_fixed31_32_from_fraction(
return res;
}
-struct fixed31_32 dal_fixed31_32_from_int_nonconst(
- int64_t arg)
-{
- struct fixed31_32 res;
-
- ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX));
-
- res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
-
- return res;
-}
-
-struct fixed31_32 dal_fixed31_32_shl(
- struct fixed31_32 arg,
- uint8_t shift)
-{
- struct fixed31_32 res;
-
- ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
- ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift)));
-
- res.value = arg.value << shift;
-
- return res;
-}
-
-struct fixed31_32 dal_fixed31_32_add(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2)
-{
- struct fixed31_32 res;
-
- ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
- ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
-
- res.value = arg1.value + arg2.value;
-
- return res;
-}
-
-struct fixed31_32 dal_fixed31_32_sub(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2)
-{
- struct fixed31_32 res;
-
- ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
- ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
-
- res.value = arg1.value - arg2.value;
-
- return res;
-}
-
-struct fixed31_32 dal_fixed31_32_mul(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
struct fixed31_32 res;
bool arg1_negative = arg1.value < 0;
bool arg2_negative = arg2.value < 0;
- uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value;
- uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value;
+ unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
+ unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
- uint64_t arg1_int = GET_INTEGER_PART(arg1_value);
- uint64_t arg2_int = GET_INTEGER_PART(arg2_value);
+ unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
+ unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
- uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value);
- uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value);
+ unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
+ unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
- uint64_t tmp;
+ unsigned long long tmp;
res.value = arg1_int * arg2_int;
@@ -200,22 +142,22 @@ struct fixed31_32 dal_fixed31_32_mul(
tmp = arg1_int * arg2_fra;
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
tmp = arg2_int * arg1_fra;
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
tmp = arg1_fra * arg2_fra;
tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
- (tmp >= (uint64_t)dal_fixed31_32_half.value);
+ (tmp >= (unsigned long long)dc_fixpt_half.value);
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
@@ -225,18 +167,17 @@ struct fixed31_32 dal_fixed31_32_mul(
return res;
}
-struct fixed31_32 dal_fixed31_32_sqr(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
{
struct fixed31_32 res;
- uint64_t arg_value = abs_i64(arg.value);
+ unsigned long long arg_value = abs_i64(arg.value);
- uint64_t arg_int = GET_INTEGER_PART(arg_value);
+ unsigned long long arg_int = GET_INTEGER_PART(arg_value);
- uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value);
+ unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
- uint64_t tmp;
+ unsigned long long tmp;
res.value = arg_int * arg_int;
@@ -246,28 +187,27 @@ struct fixed31_32 dal_fixed31_32_sqr(
tmp = arg_int * arg_fra;
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
tmp = arg_fra * arg_fra;
tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
- (tmp >= (uint64_t)dal_fixed31_32_half.value);
+ (tmp >= (unsigned long long)dc_fixpt_half.value);
- ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+ ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
res.value += tmp;
return res;
}
-struct fixed31_32 dal_fixed31_32_recip(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
{
/*
* @note
@@ -276,41 +216,40 @@ struct fixed31_32 dal_fixed31_32_recip(
ASSERT(arg.value);
- return dal_fixed31_32_from_fraction(
- dal_fixed31_32_one.value,
+ return dc_fixpt_from_fraction(
+ dc_fixpt_one.value,
arg.value);
}
-struct fixed31_32 dal_fixed31_32_sinc(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg)
{
struct fixed31_32 square;
- struct fixed31_32 res = dal_fixed31_32_one;
+ struct fixed31_32 res = dc_fixpt_one;
- int32_t n = 27;
+ int n = 27;
struct fixed31_32 arg_norm = arg;
- if (dal_fixed31_32_le(
- dal_fixed31_32_two_pi,
- dal_fixed31_32_abs(arg))) {
- arg_norm = dal_fixed31_32_sub(
+ if (dc_fixpt_le(
+ dc_fixpt_two_pi,
+ dc_fixpt_abs(arg))) {
+ arg_norm = dc_fixpt_sub(
arg_norm,
- dal_fixed31_32_mul_int(
- dal_fixed31_32_two_pi,
- (int32_t)div64_s64(
+ dc_fixpt_mul_int(
+ dc_fixpt_two_pi,
+ (int)div64_s64(
arg_norm.value,
- dal_fixed31_32_two_pi.value)));
+ dc_fixpt_two_pi.value)));
}
- square = dal_fixed31_32_sqr(arg_norm);
+ square = dc_fixpt_sqr(arg_norm);
do {
- res = dal_fixed31_32_sub(
- dal_fixed31_32_one,
- dal_fixed31_32_div_int(
- dal_fixed31_32_mul(
+ res = dc_fixpt_sub(
+ dc_fixpt_one,
+ dc_fixpt_div_int(
+ dc_fixpt_mul(
square,
res),
n * (n - 1)));
@@ -319,37 +258,35 @@ struct fixed31_32 dal_fixed31_32_sinc(
} while (n > 2);
if (arg.value != arg_norm.value)
- res = dal_fixed31_32_div(
- dal_fixed31_32_mul(res, arg_norm),
+ res = dc_fixpt_div(
+ dc_fixpt_mul(res, arg_norm),
arg);
return res;
}
-struct fixed31_32 dal_fixed31_32_sin(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg)
{
- return dal_fixed31_32_mul(
+ return dc_fixpt_mul(
arg,
- dal_fixed31_32_sinc(arg));
+ dc_fixpt_sinc(arg));
}
-struct fixed31_32 dal_fixed31_32_cos(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
{
/* TODO implement argument normalization */
- const struct fixed31_32 square = dal_fixed31_32_sqr(arg);
+ const struct fixed31_32 square = dc_fixpt_sqr(arg);
- struct fixed31_32 res = dal_fixed31_32_one;
+ struct fixed31_32 res = dc_fixpt_one;
- int32_t n = 26;
+ int n = 26;
do {
- res = dal_fixed31_32_sub(
- dal_fixed31_32_one,
- dal_fixed31_32_div_int(
- dal_fixed31_32_mul(
+ res = dc_fixpt_sub(
+ dc_fixpt_one,
+ dc_fixpt_div_int(
+ dc_fixpt_mul(
square,
res),
n * (n - 1)));
@@ -367,37 +304,35 @@ struct fixed31_32 dal_fixed31_32_cos(
*
* Calculated as Taylor series.
*/
-static struct fixed31_32 fixed31_32_exp_from_taylor_series(
- struct fixed31_32 arg)
+static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
{
- uint32_t n = 9;
+ unsigned int n = 9;
- struct fixed31_32 res = dal_fixed31_32_from_fraction(
+ struct fixed31_32 res = dc_fixpt_from_fraction(
n + 2,
n + 1);
/* TODO find correct res */
- ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one));
+ ASSERT(dc_fixpt_lt(arg, dc_fixpt_one));
do
- res = dal_fixed31_32_add(
- dal_fixed31_32_one,
- dal_fixed31_32_div_int(
- dal_fixed31_32_mul(
+ res = dc_fixpt_add(
+ dc_fixpt_one,
+ dc_fixpt_div_int(
+ dc_fixpt_mul(
arg,
res),
n));
while (--n != 1);
- return dal_fixed31_32_add(
- dal_fixed31_32_one,
- dal_fixed31_32_mul(
+ return dc_fixpt_add(
+ dc_fixpt_one,
+ dc_fixpt_mul(
arg,
res));
}
-struct fixed31_32 dal_fixed31_32_exp(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg)
{
/*
* @brief
@@ -406,44 +341,43 @@ struct fixed31_32 dal_fixed31_32_exp(
* where m = round(x / ln(2)), r = x - m * ln(2)
*/
- if (dal_fixed31_32_le(
- dal_fixed31_32_ln2_div_2,
- dal_fixed31_32_abs(arg))) {
- int32_t m = dal_fixed31_32_round(
- dal_fixed31_32_div(
+ if (dc_fixpt_le(
+ dc_fixpt_ln2_div_2,
+ dc_fixpt_abs(arg))) {
+ int m = dc_fixpt_round(
+ dc_fixpt_div(
arg,
- dal_fixed31_32_ln2));
+ dc_fixpt_ln2));
- struct fixed31_32 r = dal_fixed31_32_sub(
+ struct fixed31_32 r = dc_fixpt_sub(
arg,
- dal_fixed31_32_mul_int(
- dal_fixed31_32_ln2,
+ dc_fixpt_mul_int(
+ dc_fixpt_ln2,
m));
ASSERT(m != 0);
- ASSERT(dal_fixed31_32_lt(
- dal_fixed31_32_abs(r),
- dal_fixed31_32_one));
+ ASSERT(dc_fixpt_lt(
+ dc_fixpt_abs(r),
+ dc_fixpt_one));
if (m > 0)
- return dal_fixed31_32_shl(
+ return dc_fixpt_shl(
fixed31_32_exp_from_taylor_series(r),
- (uint8_t)m);
+ (unsigned char)m);
else
- return dal_fixed31_32_div_int(
+ return dc_fixpt_div_int(
fixed31_32_exp_from_taylor_series(r),
1LL << -m);
} else if (arg.value != 0)
return fixed31_32_exp_from_taylor_series(arg);
else
- return dal_fixed31_32_one;
+ return dc_fixpt_one;
}
-struct fixed31_32 dal_fixed31_32_log(
- struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg)
{
- struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one);
+ struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one);
/* TODO improve 1st estimation */
struct fixed31_32 error;
@@ -453,15 +387,15 @@ struct fixed31_32 dal_fixed31_32_log(
/* TODO if arg is zero, return -INF */
do {
- struct fixed31_32 res1 = dal_fixed31_32_add(
- dal_fixed31_32_sub(
+ struct fixed31_32 res1 = dc_fixpt_add(
+ dc_fixpt_sub(
res,
- dal_fixed31_32_one),
- dal_fixed31_32_div(
+ dc_fixpt_one),
+ dc_fixpt_div(
arg,
- dal_fixed31_32_exp(res)));
+ dc_fixpt_exp(res)));
- error = dal_fixed31_32_sub(
+ error = dc_fixpt_sub(
res,
res1);
@@ -472,78 +406,23 @@ struct fixed31_32 dal_fixed31_32_log(
return res;
}
-struct fixed31_32 dal_fixed31_32_pow(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2)
-{
- return dal_fixed31_32_exp(
- dal_fixed31_32_mul(
- dal_fixed31_32_log(arg1),
- arg2));
-}
-
-int32_t dal_fixed31_32_floor(
- struct fixed31_32 arg)
-{
- uint64_t arg_value = abs_i64(arg.value);
-
- if (arg.value >= 0)
- return (int32_t)GET_INTEGER_PART(arg_value);
- else
- return -(int32_t)GET_INTEGER_PART(arg_value);
-}
-
-int32_t dal_fixed31_32_round(
- struct fixed31_32 arg)
-{
- uint64_t arg_value = abs_i64(arg.value);
-
- const int64_t summand = dal_fixed31_32_half.value;
-
- ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
-
- arg_value += summand;
-
- if (arg.value >= 0)
- return (int32_t)GET_INTEGER_PART(arg_value);
- else
- return -(int32_t)GET_INTEGER_PART(arg_value);
-}
-
-int32_t dal_fixed31_32_ceil(
- struct fixed31_32 arg)
-{
- uint64_t arg_value = abs_i64(arg.value);
-
- const int64_t summand = dal_fixed31_32_one.value -
- dal_fixed31_32_epsilon.value;
-
- ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
-
- arg_value += summand;
-
- if (arg.value >= 0)
- return (int32_t)GET_INTEGER_PART(arg_value);
- else
- return -(int32_t)GET_INTEGER_PART(arg_value);
-}
/* this function is a generic helper to translate fixed point value to
* specified integer format that will consist of integer_bits integer part and
* fractional_bits fractional part. For example it is used in
- * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional
+ * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
* part in 32 bits. It is used in hw programming (scaler)
*/
-static inline uint32_t ux_dy(
- int64_t value,
- uint32_t integer_bits,
- uint32_t fractional_bits)
+static inline unsigned int ux_dy(
+ long long value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
{
/* 1. create mask of integer part */
- uint32_t result = (1 << integer_bits) - 1;
+ unsigned int result = (1 << integer_bits) - 1;
/* 2. mask out fractional part */
- uint32_t fractional_part = FRACTIONAL_PART_MASK & value;
+ unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
/* 3. shrink fixed point integer part to be of integer_bits width*/
result &= GET_INTEGER_PART(value);
/* 4. make space for fractional part to be filled in after integer */
@@ -554,13 +433,13 @@ static inline uint32_t ux_dy(
return result | fractional_part;
}
-static inline uint32_t clamp_ux_dy(
- int64_t value,
- uint32_t integer_bits,
- uint32_t fractional_bits,
- uint32_t min_clamp)
+static inline unsigned int clamp_ux_dy(
+ long long value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits,
+ unsigned int min_clamp)
{
- uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits);
+ unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
return (1 << (integer_bits + fractional_bits)) - 1;
@@ -570,35 +449,30 @@ static inline uint32_t clamp_ux_dy(
return min_clamp;
}
-uint32_t dal_fixed31_32_u2d19(
- struct fixed31_32 arg)
+unsigned int dc_fixpt_u2d19(struct fixed31_32 arg)
{
return ux_dy(arg.value, 2, 19);
}
-uint32_t dal_fixed31_32_u0d19(
- struct fixed31_32 arg)
+unsigned int dc_fixpt_u0d19(struct fixed31_32 arg)
{
return ux_dy(arg.value, 0, 19);
}
-uint32_t dal_fixed31_32_clamp_u0d14(
- struct fixed31_32 arg)
+unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg)
{
return clamp_ux_dy(arg.value, 0, 14, 1);
}
-uint32_t dal_fixed31_32_clamp_u0d10(
- struct fixed31_32 arg)
+unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg)
{
return clamp_ux_dy(arg.value, 0, 10, 1);
}
-int32_t dal_fixed31_32_s4d19(
- struct fixed31_32 arg)
+int dc_fixpt_s4d19(struct fixed31_32 arg)
{
if (arg.value < 0)
- return -(int32_t)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19);
+ return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19);
else
return ux_dy(arg.value, 4, 19);
}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c
deleted file mode 100644
index 4d3aaa82a07b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "include/fixed32_32.h"
-
-static uint64_t u64_div(uint64_t n, uint64_t d)
-{
- uint32_t i = 0;
- uint64_t r;
- uint64_t q = div64_u64_rem(n, d, &r);
-
- for (i = 0; i < 32; ++i) {
- uint64_t sbit = q & (1ULL<<63);
-
- r <<= 1;
- r |= sbit ? 1 : 0;
- q <<= 1;
- if (r >= d) {
- r -= d;
- q |= 1;
- }
- }
-
- if (2*r >= d)
- q += 1;
- return q;
-}
-
-struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d)
-{
- struct fixed32_32 fx;
-
- fx.value = u64_div((uint64_t)n << 32, (uint64_t)d << 32);
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- struct fixed32_32 fx = {lhs.value + rhs.value};
-
- ASSERT(fx.value >= rhs.value);
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- struct fixed32_32 fx = {lhs.value + ((uint64_t)rhs << 32)};
-
- ASSERT(fx.value >= (uint64_t)rhs << 32);
- return fx;
-
-}
-struct fixed32_32 dal_fixed32_32_sub(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- struct fixed32_32 fx;
-
- ASSERT(lhs.value >= rhs.value);
- fx.value = lhs.value - rhs.value;
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_sub_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- struct fixed32_32 fx;
-
- ASSERT(lhs.value >= ((uint64_t)rhs<<32));
- fx.value = lhs.value - ((uint64_t)rhs<<32);
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_mul(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- struct fixed32_32 fx;
- uint64_t lhs_int = lhs.value>>32;
- uint64_t lhs_frac = (uint32_t)lhs.value;
- uint64_t rhs_int = rhs.value>>32;
- uint64_t rhs_frac = (uint32_t)rhs.value;
- uint64_t ahbh = lhs_int * rhs_int;
- uint64_t ahbl = lhs_int * rhs_frac;
- uint64_t albh = lhs_frac * rhs_int;
- uint64_t albl = lhs_frac * rhs_frac;
-
- ASSERT((ahbh>>32) == 0);
-
- fx.value = (ahbh<<32) + ahbl + albh + (albl>>32);
- return fx;
-
-}
-
-struct fixed32_32 dal_fixed32_32_mul_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- struct fixed32_32 fx;
- uint64_t lhsi = (lhs.value>>32) * (uint64_t)rhs;
- uint64_t lhsf;
-
- ASSERT((lhsi>>32) == 0);
- lhsf = ((uint32_t)lhs.value) * (uint64_t)rhs;
- ASSERT((lhsi<<32) + lhsf >= lhsf);
- fx.value = (lhsi<<32) + lhsf;
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_div(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- struct fixed32_32 fx;
-
- fx.value = u64_div(lhs.value, rhs.value);
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_div_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- struct fixed32_32 fx;
-
- fx.value = u64_div(lhs.value, (uint64_t)rhs << 32);
- return fx;
-}
-
-uint32_t dal_fixed32_32_ceil(struct fixed32_32 v)
-{
- ASSERT((uint32_t)v.value ? (v.value >> 32) + 1 >= 1 : true);
- return (v.value>>32) + ((uint32_t)v.value ? 1 : 0);
-}
-
-uint32_t dal_fixed32_32_round(struct fixed32_32 v)
-{
- ASSERT(v.value + (1ULL<<31) >= (1ULL<<31));
- return (v.value + (1ULL<<31))>>32;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
index 854678a0c54b..021451549ff7 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
@@ -94,7 +94,6 @@ void dc_conn_log(struct dc_context *ctx,
dm_logger_append(&entry, "%2.2X ", hex_data[i]);
dm_logger_append(&entry, "^\n");
- dm_helpers_dc_conn_log(ctx, &entry, event);
fail:
dm_logger_close(&entry);
diff --git a/drivers/gpu/drm/amd/display/dc/basics/logger.c b/drivers/gpu/drm/amd/display/dc/basics/logger.c
index 31bee054f43a..738a818d58d1 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/logger.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/logger.c
@@ -61,7 +61,7 @@ static const struct dc_log_type_info log_type_info_tbl[] = {
{LOG_EVENT_UNDERFLOW, "Underflow"},
{LOG_IF_TRACE, "InterfaceTrace"},
{LOG_DTN, "DTN"},
- {LOG_PROFILING, "Profiling"}
+ {LOG_DISPLAYSTATS, "DisplayStats"}
};
@@ -402,3 +402,4 @@ cleanup:
entry->max_buf_bytes = 0;
}
}
+
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 10a5807a7e8b..b8cef7af3c4a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1330,6 +1330,9 @@ static enum bp_result bios_parser_get_firmware_info(
case 2:
result = get_firmware_info_v3_2(bp, info);
break;
+ case 3:
+ result = get_firmware_info_v3_2(bp, info);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index 4b5fdd577848..651e1fd4622f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -24,7 +24,7 @@
*/
#include "dm_services.h"
-
+#include "amdgpu.h"
#include "atom.h"
#include "include/bios_parser_interface.h"
@@ -35,16 +35,16 @@
#include "bios_parser_types_internal.h"
#define EXEC_BIOS_CMD_TABLE(command, params)\
- (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \
+ (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GetIndexIntoMasterTable(COMMAND, command), \
- &params) == 0)
+ (uint32_t *)&params) == 0)
#define BIOS_CMD_TABLE_REVISION(command, frev, crev)\
- cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \
+ amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GetIndexIntoMasterTable(COMMAND, command), &frev, &crev)
#define BIOS_CMD_TABLE_PARA_REVISION(command)\
- bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \
+ bios_cmd_table_para_revision(bp->base.ctx->driver_context, \
GetIndexIntoMasterTable(COMMAND, command))
static void init_dig_encoder_control(struct bios_parser *bp);
@@ -82,16 +82,18 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp)
init_set_dce_clock(bp);
}
-static uint32_t bios_cmd_table_para_revision(void *cgs_device,
+static uint32_t bios_cmd_table_para_revision(void *dev,
uint32_t index)
{
+ struct amdgpu_device *adev = dev;
uint8_t frev, crev;
- if (cgs_atom_get_cmd_table_revs(cgs_device,
+ if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context,
index,
- &frev, &crev) != 0)
+ &frev, &crev))
+ return crev;
+ else
return 0;
- return crev;
}
/*******************************************************************************
@@ -368,7 +370,7 @@ static void init_transmitter_control(struct bios_parser *bp)
uint8_t crev;
if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl,
- frev, crev) != 0)
+ frev, crev) == false)
BREAK_TO_DEBUGGER();
switch (crev) {
case 2:
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 3f63f712c8a4..752b08a42d3e 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -26,14 +26,18 @@
#include "dm_services.h"
#include "ObjectID.h"
-#include "atomfirmware.h"
+#include "atomfirmware.h"
+#include "atom.h"
#include "include/bios_parser_interface.h"
#include "command_table2.h"
#include "command_table_helper2.h"
#include "bios_parser_helper.h"
#include "bios_parser_types_internal2.h"
+#include "amdgpu.h"
+
+
#define DC_LOGGER \
bp->base.ctx->logger
@@ -43,16 +47,16 @@
->FieldName)-(char *)0)/sizeof(uint16_t))
#define EXEC_BIOS_CMD_TABLE(fname, params)\
- (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \
+ (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GET_INDEX_INTO_MASTER_TABLE(command, fname), \
- &params) == 0)
+ (uint32_t *)&params) == 0)
#define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\
- cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \
+ amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GET_INDEX_INTO_MASTER_TABLE(command, fname), &frev, &crev)
#define BIOS_CMD_TABLE_PARA_REVISION(fname)\
- bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \
+ bios_cmd_table_para_revision(bp->base.ctx->driver_context, \
GET_INDEX_INTO_MASTER_TABLE(command, fname))
static void init_dig_encoder_control(struct bios_parser *bp);
@@ -86,16 +90,18 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp)
init_get_smu_clock_info(bp);
}
-static uint32_t bios_cmd_table_para_revision(void *cgs_device,
+static uint32_t bios_cmd_table_para_revision(void *dev,
uint32_t index)
{
+ struct amdgpu_device *adev = dev;
uint8_t frev, crev;
- if (cgs_atom_get_cmd_table_revs(cgs_device,
+ if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context,
index,
- &frev, &crev) != 0)
+ &frev, &crev))
+ return crev;
+ else
return 0;
- return crev;
}
/******************************************************************************
@@ -201,7 +207,7 @@ static void init_transmitter_control(struct bios_parser *bp)
uint8_t frev;
uint8_t crev;
- if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) != 0)
+ if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) == false)
BREAK_TO_DEBUGGER();
switch (crev) {
case 6:
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
index 2979358c6a55..253bbb1eea60 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
@@ -51,6 +51,7 @@ bool dal_bios_parser_init_cmd_tbl_helper(
return true;
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
*h = dal_cmd_tbl_helper_dce112_get_table();
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 9a4d30dd4969..bbbcef566c55 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -52,6 +52,7 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
return true;
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h
new file mode 100644
index 000000000000..fc3f98fb09ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h
@@ -0,0 +1,579 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _CALCS_CALCS_LOGGER_H_
+#define _CALCS_CALCS_LOGGER_H_
+#define DC_LOGGER \
+ logger
+
+static void print_bw_calcs_dceip(struct dal_logger *logger, const struct bw_calcs_dceip *dceip)
+{
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_dceip");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_calcs_version version %d", dceip->version);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] large_cursor: %d", dceip->large_cursor);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] dmif_pipe_en_fbc_chunk_tracker: %d", dceip->dmif_pipe_en_fbc_chunk_tracker);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] display_write_back_supported: %d", dceip->display_write_back_supported);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] argb_compression_support: %d", dceip->argb_compression_support);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] pre_downscaler_enabled: %d", dceip->pre_downscaler_enabled);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] underlay_downscale_prefetch_enabled: %d",
+ dceip->underlay_downscale_prefetch_enabled);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] graphics_lb_nodownscaling_multi_line_prefetching: %d",
+ dceip->graphics_lb_nodownscaling_multi_line_prefetching);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] limit_excessive_outstanding_dmif_requests: %d",
+ dceip->limit_excessive_outstanding_dmif_requests);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_max_outstanding_group_num: %d",
+ dceip->cursor_max_outstanding_group_num);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lines_interleaved_into_lb: %d", dceip->lines_interleaved_into_lb);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] low_power_tiling_mode: %d", dceip->low_power_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_width: %d", dceip->chunk_width);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_graphics_pipes: %d", dceip->number_of_graphics_pipes);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_pipes: %d", dceip->number_of_underlay_pipes);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_dmif_buffer_allocated: %d", dceip->max_dmif_buffer_allocated);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_dmif_size: %d", dceip->graphics_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_luma_dmif_size: %d", dceip->underlay_luma_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_chroma_dmif_size: %d", dceip->underlay_chroma_dmif_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_lines_of_pte_prefetching_in_linear_mode: %d",
+ dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_luma_mcifwr_buffer_size: %d",
+ dceip->display_write_back420_luma_mcifwr_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_chroma_mcifwr_buffer_size: %d",
+ dceip->display_write_back420_chroma_mcifwr_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_pte_request_rows_in_tiling_mode: %d",
+ dceip->scatter_gather_pte_request_rows_in_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency10_bit_per_component: %d",
+ bw_fixed_to_int(dceip->underlay_vscaler_efficiency10_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency12_bit_per_component: %d",
+ bw_fixed_to_int(dceip->underlay_vscaler_efficiency12_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency6_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency6_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency8_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency8_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency10_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency10_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency12_bit_per_component: %d",
+ bw_fixed_to_int(dceip->graphics_vscaler_efficiency12_bit_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] alpha_vscaler_efficiency: %d",
+ bw_fixed_to_int(dceip->alpha_vscaler_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_write_pixels_per_dispclk: %d",
+ bw_fixed_to_int(dceip->lb_write_pixels_per_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component444: %d",
+ bw_fixed_to_int(dceip->lb_size_per_component444));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_and_dram_clock_state_change_gated_before_cursor: %d",
+ bw_fixed_to_int(dceip->stutter_and_dram_clock_state_change_gated_before_cursor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_luma_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay420_luma_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_chroma_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay420_chroma_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay422_lb_size_per_component: %d",
+ bw_fixed_to_int(dceip->underlay422_lb_size_per_component));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_chunk_width: %d", bw_fixed_to_int(dceip->cursor_chunk_width));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_dcp_buffer_lines: %d",
+ bw_fixed_to_int(dceip->cursor_dcp_buffer_lines));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_width_efficient_for_tiling: %d",
+ bw_fixed_to_int(dceip->underlay_maximum_width_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_height_efficient_for_tiling: %d",
+ bw_fixed_to_int(dceip->underlay_maximum_height_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display: %d",
+ bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation: %d",
+ bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_outstanding_pte_request_limit: %d",
+ bw_fixed_to_int(dceip->minimum_outstanding_pte_request_limit));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_total_outstanding_pte_requests_allowed_by_saw: %d",
+ bw_fixed_to_int(dceip->maximum_total_outstanding_pte_requests_allowed_by_saw));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] linear_mode_line_request_alternation_slice: %d",
+ bw_fixed_to_int(dceip->linear_mode_line_request_alternation_slice));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_efficiency: %d", bw_fixed_to_int(dceip->request_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_per_request: %d", bw_fixed_to_int(dceip->dispclk_per_request));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_ramping_factor: %d",
+ bw_fixed_to_int(dceip->dispclk_ramping_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_throughput_factor: %d",
+ bw_fixed_to_int(dceip->display_pipe_throughput_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_all_surfaces_burst_time: %d",
+ bw_fixed_to_int(dceip->mcifwr_all_surfaces_burst_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_request_buffer_size: %d",
+ bw_fixed_to_int(dceip->dmif_request_buffer_size));
+
+
+}
+
+static void print_bw_calcs_vbios(struct dal_logger *logger, const struct bw_calcs_vbios *vbios)
+{
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_vbios vbios");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] dram_channel_width_in_bits: %d", vbios->dram_channel_width_in_bits);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", vbios->number_of_dram_channels);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_banks: %d", vbios->number_of_dram_banks);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_yclk: %d", bw_fixed_to_int(vbios->low_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_yclk: %d", bw_fixed_to_int(vbios->mid_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_yclk: %d", bw_fixed_to_int(vbios->high_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_sclk: %d", bw_fixed_to_int(vbios->low_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid1_sclk: %d", bw_fixed_to_int(vbios->mid1_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid2_sclk: %d", bw_fixed_to_int(vbios->mid2_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid3_sclk: %d", bw_fixed_to_int(vbios->mid3_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid4_sclk: %d", bw_fixed_to_int(vbios->mid4_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid5_sclk: %d", bw_fixed_to_int(vbios->mid5_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid6_sclk: %d", bw_fixed_to_int(vbios->mid6_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_sclk: %d", bw_fixed_to_int(vbios->high_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_dispclk: %d",
+ bw_fixed_to_int(vbios->low_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_dispclk;: %d",
+ bw_fixed_to_int(vbios->mid_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_dispclk;: %d",
+ bw_fixed_to_int(vbios->high_voltage_max_dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->low_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->mid_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_phyclk: %d",
+ bw_fixed_to_int(vbios->high_voltage_max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_return_bus_width: %d", bw_fixed_to_int(vbios->data_return_bus_width));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] trc: %d", bw_fixed_to_int(vbios->trc));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency: %d", bw_fixed_to_int(vbios->dmifmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_exit_latency: %d",
+ bw_fixed_to_int(vbios->stutter_self_refresh_exit_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_entry_latency: %d",
+ bw_fixed_to_int(vbios->stutter_self_refresh_entry_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_latency: %d",
+ bw_fixed_to_int(vbios->nbp_state_change_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrmc_urgent_latency: %d",
+ bw_fixed_to_int(vbios->mcifwrmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable: %d", vbios->scatter_gather_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] down_spread_percentage: %d",
+ bw_fixed_to_int(vbios->down_spread_percentage));
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_width: %d", vbios->cursor_width);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] average_compression_rate: %d", vbios->average_compression_rate);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_request_slots_gmc_reserves_for_dmif_per_channel: %d",
+ vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration: %d", bw_fixed_to_int(vbios->blackout_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_blackout_recovery_time: %d",
+ bw_fixed_to_int(vbios->maximum_blackout_recovery_time));
+
+
+}
+
+static void print_bw_calcs_data(struct dal_logger *logger, struct bw_calcs_data *data)
+{
+
+ int i, j, k;
+
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_data data");
+ DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_displays: %d", data->number_of_displays);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_surface_type: %d", data->underlay_surface_type);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines panning_and_bezel_adjustment: %d",
+ data->panning_and_bezel_adjustment);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_tiling_mode: %d", data->graphics_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_lb_bpc: %d", data->graphics_lb_bpc);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_lb_bpc: %d", data->underlay_lb_bpc);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_tiling_mode: %d", data->underlay_tiling_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d0_underlay_mode: %d", data->d0_underlay_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] d1_display_write_back_dwb_enable: %d", data->d1_display_write_back_dwb_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d1_underlay_mode: %d", data->d1_underlay_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] cpup_state_change_enable: %d", data->cpup_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] cpuc_state_change_enable: %d", data->cpuc_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] nbp_state_change_enable: %d", data->nbp_state_change_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] stutter_mode_enable: %d", data->stutter_mode_enable);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] y_clk_level: %d", data->y_clk_level);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] sclk_level: %d", data->sclk_level);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_surfaces: %d", data->number_of_underlay_surfaces);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_wrchannels: %d", data->number_of_dram_wrchannels);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_request_delay: %d", data->chunk_request_delay);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", data->number_of_dram_channels);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_micro_tile_mode: %d", data->underlay_micro_tile_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_micro_tile_mode: %d", data->graphics_micro_tile_mode);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] max_phyclk: %d", bw_fixed_to_int(data->max_phyclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_efficiency: %d", bw_fixed_to_int(data->dram_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_surface_type: %d",
+ bw_fixed_to_int(data->src_width_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_surface_type: %d",
+ bw_fixed_to_int(data->src_height_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_surface_type: %d",
+ bw_fixed_to_int(data->hsr_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_surface_type: %d", bw_fixed_to_int(data->vsr_after_surface_type));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_rotation: %d",
+ bw_fixed_to_int(data->src_width_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_rotation: %d",
+ bw_fixed_to_int(data->src_height_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_rotation: %d", bw_fixed_to_int(data->hsr_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_rotation: %d", bw_fixed_to_int(data->vsr_after_rotation));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_pixels: %d", bw_fixed_to_int(data->source_height_pixels));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_stereo: %d", bw_fixed_to_int(data->hsr_after_stereo));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_stereo: %d", bw_fixed_to_int(data->vsr_after_stereo));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_in_lb: %d", bw_fixed_to_int(data->source_width_in_lb));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_line_pitch: %d", bw_fixed_to_int(data->lb_line_pitch));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_source_efficient_for_tiling: %d",
+ bw_fixed_to_int(data->underlay_maximum_source_efficient_for_tiling));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] num_lines_at_frame_start: %d",
+ bw_fixed_to_int(data->num_lines_at_frame_start));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dmif_size_in_time: %d", bw_fixed_to_int(data->min_dmif_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_mcifwr_size_in_time: %d",
+ bw_fixed_to_int(data->min_mcifwr_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_dmif_size: %d",
+ bw_fixed_to_int(data->total_requests_for_dmif_size));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting: %d",
+ bw_fixed_to_int(data->peak_pte_request_to_eviction_ratio_limiting));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_pte_per_pte_request: %d",
+ bw_fixed_to_int(data->useful_pte_per_pte_request));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_rows: %d",
+ bw_fixed_to_int(data->scatter_gather_pte_request_rows));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_row_height: %d",
+ bw_fixed_to_int(data->scatter_gather_row_height));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_vblank: %d",
+ bw_fixed_to_int(data->scatter_gather_pte_requests_in_vblank));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] inefficient_linear_pitch_in_bytes: %d",
+ bw_fixed_to_int(data->inefficient_linear_pitch_in_bytes));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_data: %d", bw_fixed_to_int(data->cursor_total_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_request_groups: %d",
+ bw_fixed_to_int(data->cursor_total_request_groups));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_requests: %d",
+ bw_fixed_to_int(data->scatter_gather_total_pte_requests));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_request_groups: %d",
+ bw_fixed_to_int(data->scatter_gather_total_pte_request_groups));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] tile_width_in_pixels: %d", bw_fixed_to_int(data->tile_width_in_pixels));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_number_of_data_request_page_close_open: %d",
+ bw_fixed_to_int(data->dmif_total_number_of_data_request_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_number_of_data_request_page_close_open: %d",
+ bw_fixed_to_int(data->mcifwr_total_number_of_data_request_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_page_close_open: %d",
+ bw_fixed_to_int(data->bytes_per_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_page_close_open_time: %d",
+ bw_fixed_to_int(data->mcifwr_total_page_close_open_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_adjusted_dmif_size: %d",
+ bw_fixed_to_int(data->total_requests_for_adjusted_dmif_size));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_trips: %d",
+ bw_fixed_to_int(data->total_dmifmc_urgent_trips));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_latency: %d",
+ bw_fixed_to_int(data->total_dmifmc_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_data: %d",
+ bw_fixed_to_int(data->total_display_reads_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_dram_access_data: %d",
+ bw_fixed_to_int(data->total_display_reads_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_data: %d",
+ bw_fixed_to_int(data->total_display_writes_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_dram_access_data: %d",
+ bw_fixed_to_int(data->total_display_writes_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_data: %d",
+ bw_fixed_to_int(data->display_reads_required_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_dram_access_data: %d",
+ bw_fixed_to_int(data->display_reads_required_dram_access_data));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_page_close_open_time: %d",
+ bw_fixed_to_int(data->dmif_total_page_close_open_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_cursor_memory_interface_buffer_size_in_time: %d",
+ bw_fixed_to_int(data->min_cursor_memory_interface_buffer_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_read_buffer_size_in_time: %d",
+ bw_fixed_to_int(data->min_read_buffer_size_in_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer: %d",
+ bw_fixed_to_int(data->display_reads_time_for_data_transfer));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_writes_time_for_data_transfer: %d",
+ bw_fixed_to_int(data->display_writes_time_for_data_transfer));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_dram_bandwidth: %d",
+ bw_fixed_to_int(data->dmif_required_dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_dram_bandwidth: %d",
+ bw_fixed_to_int(data->mcifwr_required_dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dmifmc_urgent_latency_for_page_close_open: %d",
+ bw_fixed_to_int(data->required_dmifmc_urgent_latency_for_page_close_open));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_mcifmcwr_urgent_latency: %d",
+ bw_fixed_to_int(data->required_mcifmcwr_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dram_bandwidth_gbyte_per_second: %d",
+ bw_fixed_to_int(data->required_dram_bandwidth_gbyte_per_second));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_bandwidth: %d", bw_fixed_to_int(data->dram_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk: %d", bw_fixed_to_int(data->dmif_required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_sclk: %d", bw_fixed_to_int(data->mcifwr_required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_sclk: %d", bw_fixed_to_int(data->required_sclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] downspread_factor: %d", bw_fixed_to_int(data->downspread_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scaler_efficiency: %d", bw_fixed_to_int(data->v_scaler_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scaler_limits_factor: %d", bw_fixed_to_int(data->scaler_limits_factor));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_pixel_throughput: %d",
+ bw_fixed_to_int(data->display_pipe_pixel_throughput));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping: %d",
+ bw_fixed_to_int(data->total_dispclk_required_with_ramping));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping: %d",
+ bw_fixed_to_int(data->total_dispclk_required_without_ramping));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_read_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_read_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_write_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_write_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_total_read_request_bandwidth: %d",
+ bw_fixed_to_int(data->dispclk_required_for_total_read_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping_with_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_dispclk_required_with_ramping_with_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping_with_request_bandwidth: %d",
+ bw_fixed_to_int(data->total_dispclk_required_without_ramping_with_request_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk: %d", bw_fixed_to_int(data->dispclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_recovery_time: %d", bw_fixed_to_int(data->blackout_recovery_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_pixels_per_data_fifo_entry: %d",
+ bw_fixed_to_int(data->min_pixels_per_data_fifo_entry));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] sclk_deep_sleep: %d", bw_fixed_to_int(data->sclk_deep_sleep));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] chunk_request_time: %d", bw_fixed_to_int(data->chunk_request_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_request_time: %d", bw_fixed_to_int(data->cursor_request_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] line_source_pixels_transfer_time: %d",
+ bw_fixed_to_int(data->line_source_pixels_transfer_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifdram_access_efficiency: %d",
+ bw_fixed_to_int(data->dmifdram_access_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrdram_access_efficiency: %d",
+ bw_fixed_to_int(data->mcifwrdram_access_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth_no_compression: %d",
+ bw_fixed_to_int(data->total_average_bandwidth_no_compression));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth: %d",
+ bw_fixed_to_int(data->total_average_bandwidth));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_stutter_cycle_duration: %d",
+ bw_fixed_to_int(data->total_stutter_cycle_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_burst_time: %d", bw_fixed_to_int(data->stutter_burst_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] time_in_self_refresh: %d", bw_fixed_to_int(data->time_in_self_refresh));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_efficiency: %d", bw_fixed_to_int(data->stutter_efficiency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] worst_number_of_trips_to_memory: %d",
+ bw_fixed_to_int(data->worst_number_of_trips_to_memory));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] immediate_flip_time: %d", bw_fixed_to_int(data->immediate_flip_time));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_dmif_clients: %d",
+ bw_fixed_to_int(data->latency_for_non_dmif_clients));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_mcifwr_clients: %d",
+ bw_fixed_to_int(data->latency_for_non_mcifwr_clients));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency_supported_in_high_sclk_and_yclk: %d",
+ bw_fixed_to_int(data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->nbp_state_dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer_and_urgent_latency: %d",
+ bw_fixed_to_int(data->display_reads_time_for_data_transfer_and_urgent_latency));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_vblank_dram_speed_change_margin: %d",
+ bw_fixed_to_int(data->min_vblank_dram_speed_change_margin));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_stutter_refresh_duration: %d",
+ bw_fixed_to_int(data->min_stutter_refresh_duration));
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_stutter_dmif_buffer_size: %d", data->total_stutter_dmif_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_bytes_requested: %d", data->total_bytes_requested);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] min_stutter_dmif_buffer_size: %d", data->min_stutter_dmif_buffer_size);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] num_stutter_bursts: %d", data->num_stutter_bursts);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_nbp_state_dram_speed_change_latency_supported: %d",
+ bw_fixed_to_int(data->v_blank_nbp_state_dram_speed_change_latency_supported));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_latency_supported: %d",
+ bw_fixed_to_int(data->nbp_state_dram_speed_change_latency_supported));
+
+ for (i = 0; i < maximum_number_of_surfaces; i++) {
+ DC_LOG_BANDWIDTH_CALCS(" [bool] fbc_en[%d]:%d\n", i, data->fbc_en[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] lpt_en[%d]:%d", i, data->lpt_en[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] displays_match_flag[%d]:%d", i, data->displays_match_flag[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] use_alpha[%d]:%d", i, data->use_alpha[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] orthogonal_rotation[%d]:%d", i, data->orthogonal_rotation[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] enable[%d]:%d", i, data->enable[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] access_one_channel_only[%d]:%d", i, data->access_one_channel_only[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable_for_pipe[%d]:%d",
+ i, data->scatter_gather_enable_for_pipe[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] interlace_mode[%d]:%d",
+ i, data->interlace_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] display_pstate_change_enable[%d]:%d",
+ i, data->display_pstate_change_enable[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bool] line_buffer_prefetch[%d]:%d", i, data->line_buffer_prefetch[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] bytes_per_pixel[%d]:%d", i, data->bytes_per_pixel[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_chunks_non_fbc_mode[%d]:%d",
+ i, data->max_chunks_non_fbc_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lb_bpc[%d]:%d", i, data->lb_bpc[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bpphdmi[%d]:%d", i, data->output_bpphdmi[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr[%d]:%d", i, data->output_bppdp4_lane_hbr[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr2[%d]:%d",
+ i, data->output_bppdp4_lane_hbr2[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr3[%d]:%d",
+ i, data->output_bppdp4_lane_hbr3[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines stereo_mode[%d]:%d", i, data->stereo_mode[i]);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_buffer_transfer_time[%d]:%d",
+ i, bw_fixed_to_int(data->dmif_buffer_transfer_time[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] displays_with_same_mode[%d]:%d",
+ i, bw_fixed_to_int(data->displays_with_same_mode[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_dmif_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_dmif_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_refresh_duration[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_refresh_duration[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_exit_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_exit_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_entry_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_entry_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_total[%d]:%d", i, bw_fixed_to_int(data->h_total[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_total[%d]:%d", i, bw_fixed_to_int(data->v_total[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixel_rate[%d]:%d", i, bw_fixed_to_int(data->pixel_rate[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width[%d]:%d", i, bw_fixed_to_int(data->src_width[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->pitch_in_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels_after_surface_type[%d]:%d",
+ i, bw_fixed_to_int(data->pitch_in_pixels_after_surface_type[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height[%d]:%d", i, bw_fixed_to_int(data->src_height[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scale_ratio[%d]:%d", i, bw_fixed_to_int(data->scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_taps[%d]:%d", i, bw_fixed_to_int(data->h_taps[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_taps[%d]:%d", i, bw_fixed_to_int(data->v_taps[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->h_scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->v_scale_ratio[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] rotation_angle[%d]:%d",
+ i, bw_fixed_to_int(data->rotation_angle[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] compression_rate[%d]:%d",
+ i, bw_fixed_to_int(data->compression_rate[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr[%d]:%d", i, bw_fixed_to_int(data->hsr[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr[%d]:%d", i, bw_fixed_to_int(data->vsr[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_rounded_up_to_chunks[%d]:%d",
+ i, bw_fixed_to_int(data->source_width_rounded_up_to_chunks[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->source_width_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_rounded_up_to_chunks[%d]:%d",
+ i, bw_fixed_to_int(data->source_height_rounded_up_to_chunks[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->display_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->request_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_request[%d]:%d",
+ i, bw_fixed_to_int(data->bytes_per_request[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_bytes_per_request[%d]:%d",
+ i, bw_fixed_to_int(data->useful_bytes_per_request[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lines_interleaved_in_mem_access[%d]:%d",
+ i, bw_fixed_to_int(data->lines_interleaved_in_mem_access[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_hiding_lines[%d]:%d",
+ i, bw_fixed_to_int(data->latency_hiding_lines[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions[%d]:%d",
+ i, bw_fixed_to_int(data->lb_partitions[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions_max[%d]:%d",
+ i, bw_fixed_to_int(data->lb_partitions_max[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_with_ramping[%d]:%d",
+ i, bw_fixed_to_int(data->dispclk_required_with_ramping[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_without_ramping[%d]:%d",
+ i, bw_fixed_to_int(data->dispclk_required_without_ramping[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->data_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] outstanding_chunk_request_limit[%d]:%d",
+ i, bw_fixed_to_int(data->outstanding_chunk_request_limit[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] urgent_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->urgent_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_watermark[%d]:%d",
+ i, bw_fixed_to_int(data->nbp_state_change_watermark[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_filter_init[%d]:%d", i, bw_fixed_to_int(data->v_filter_init[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_cycle_duration[%d]:%d",
+ i, bw_fixed_to_int(data->stutter_cycle_duration[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth[%d]:%d",
+ i, bw_fixed_to_int(data->average_bandwidth[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth_no_compression[%d]:%d",
+ i, bw_fixed_to_int(data->average_bandwidth_no_compression[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_limit[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_pte_request_limit[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component[%d]:%d",
+ i, bw_fixed_to_int(data->lb_size_per_component[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] memory_chunk_size_in_bytes[%d]:%d",
+ i, bw_fixed_to_int(data->memory_chunk_size_in_bytes[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pipe_chunk_size_in_bytes[%d]:%d",
+ i, bw_fixed_to_int(data->pipe_chunk_size_in_bytes[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] number_of_trips_to_memory_for_getting_apte_row[%d]:%d",
+ i, bw_fixed_to_int(data->number_of_trips_to_memory_for_getting_apte_row[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size[%d]:%d",
+ i, bw_fixed_to_int(data->adjusted_data_buffer_size[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size_in_memory[%d]:%d",
+ i, bw_fixed_to_int(data->adjusted_data_buffer_size_in_memory[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixels_per_data_fifo_entry[%d]:%d",
+ i, bw_fixed_to_int(data->pixels_per_data_fifo_entry[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_row[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_pte_requests_in_row[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pte_request_per_chunk[%d]:%d",
+ i, bw_fixed_to_int(data->pte_request_per_chunk[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_width[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_page_width[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_height[%d]:%d",
+ i, bw_fixed_to_int(data->scatter_gather_page_height[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_beginning_of_frame[%d]:%d",
+ i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_beginning_of_frame[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_middle_of_frame[%d]:%d",
+ i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_middle_of_frame[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_width_pixels[%d]:%d",
+ i, bw_fixed_to_int(data->cursor_width_pixels[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->minimum_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->maximum_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding_with_cursor[%d]:%d",
+ i, bw_fixed_to_int(data->minimum_latency_hiding_with_cursor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding_with_cursor[%d]:%d",
+ i, bw_fixed_to_int(data->maximum_latency_hiding_with_cursor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_first_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_pixels_for_first_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_last_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_pixels_for_last_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_first_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_data_for_first_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_last_output_pixel[%d]:%d",
+ i, bw_fixed_to_int(data->src_data_for_last_output_pixel[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] active_time[%d]:%d", i, bw_fixed_to_int(data->active_time[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] horizontal_blank_and_chunk_granularity_factor[%d]:%d",
+ i, bw_fixed_to_int(data->horizontal_blank_and_chunk_granularity_factor[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_latency_hiding[%d]:%d",
+ i, bw_fixed_to_int(data->cursor_latency_hiding[i]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_dram_speed_change_margin[%d]:%d",
+ i, bw_fixed_to_int(data->v_blank_dram_speed_change_margin[i]));
+ }
+
+ for (i = 0; i < maximum_number_of_surfaces; i++) {
+ for (j = 0; j < 3; j++) {
+ for (k = 0; k < 8; k++) {
+
+ DC_LOG_BANDWIDTH_CALCS("\n [bw_fixed] line_source_transfer_time[%d][%d][%d]:%d",
+ i, j, k, bw_fixed_to_int(data->line_source_transfer_time[i][j][k]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_line_source_transfer_time[%d][%d][%d]:%d",
+ i, j, k,
+ bw_fixed_to_int(data->dram_speed_change_line_source_transfer_time[i][j][k]));
+ }
+ }
+ }
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 8; j++) {
+
+ DC_LOG_BANDWIDTH_CALCS("\n [uint32_t] num_displays_with_margin[%d][%d]:%d",
+ i, j, data->num_displays_with_margin[i][j]);
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_burst_time[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dmif_burst_time[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_burst_time[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->mcifwr_burst_time[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dram_speed_change_margin[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->min_dram_speed_change_margin[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_dram_speed_change[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_dram_speed_change[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration_margin[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->blackout_duration_margin[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_duration[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_duration[i][j]));
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_recovery[%d][%d]:%d",
+ i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_recovery[i][j]));
+ }
+ }
+
+ for (i = 0; i < 6; i++) {
+ DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk_for_urgent_latency[%d]:%d",
+ i, bw_fixed_to_int(data->dmif_required_sclk_for_urgent_latency[i]));
+ }
+}
+;
+
+#endif /* _CALCS_CALCS_LOGGER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
index 7243c37f569e..31d167bc548f 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
@@ -36,41 +36,41 @@ static bool build_custom_float(
uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
const struct fixed31_32 mantissa_constant_plus_max_fraction =
- dal_fixed31_32_from_fraction(
+ dc_fixpt_from_fraction(
(1LL << (format->mantissa_bits + 1)) - 1,
1LL << format->mantissa_bits);
struct fixed31_32 mantiss;
- if (dal_fixed31_32_eq(
+ if (dc_fixpt_eq(
value,
- dal_fixed31_32_zero)) {
+ dc_fixpt_zero)) {
*negative = false;
*mantissa = 0;
*exponenta = 0;
return true;
}
- if (dal_fixed31_32_lt(
+ if (dc_fixpt_lt(
value,
- dal_fixed31_32_zero)) {
+ dc_fixpt_zero)) {
*negative = format->sign;
- value = dal_fixed31_32_neg(value);
+ value = dc_fixpt_neg(value);
} else {
*negative = false;
}
- if (dal_fixed31_32_lt(
+ if (dc_fixpt_lt(
value,
- dal_fixed31_32_one)) {
+ dc_fixpt_one)) {
uint32_t i = 1;
do {
- value = dal_fixed31_32_shl(value, 1);
+ value = dc_fixpt_shl(value, 1);
++i;
- } while (dal_fixed31_32_lt(
+ } while (dc_fixpt_lt(
value,
- dal_fixed31_32_one));
+ dc_fixpt_one));
--i;
@@ -81,15 +81,15 @@ static bool build_custom_float(
}
*exponenta = exp_offset - i;
- } else if (dal_fixed31_32_le(
+ } else if (dc_fixpt_le(
mantissa_constant_plus_max_fraction,
value)) {
uint32_t i = 1;
do {
- value = dal_fixed31_32_shr(value, 1);
+ value = dc_fixpt_shr(value, 1);
++i;
- } while (dal_fixed31_32_lt(
+ } while (dc_fixpt_lt(
mantissa_constant_plus_max_fraction,
value));
@@ -98,23 +98,23 @@ static bool build_custom_float(
*exponenta = exp_offset;
}
- mantiss = dal_fixed31_32_sub(
+ mantiss = dc_fixpt_sub(
value,
- dal_fixed31_32_one);
+ dc_fixpt_one);
- if (dal_fixed31_32_lt(
+ if (dc_fixpt_lt(
mantiss,
- dal_fixed31_32_zero) ||
- dal_fixed31_32_lt(
- dal_fixed31_32_one,
+ dc_fixpt_zero) ||
+ dc_fixpt_lt(
+ dc_fixpt_one,
mantiss))
- mantiss = dal_fixed31_32_zero;
+ mantiss = dc_fixpt_zero;
else
- mantiss = dal_fixed31_32_shl(
+ mantiss = dc_fixpt_shl(
mantiss,
format->mantissa_bits);
- *mantissa = dal_fixed31_32_floor(mantiss);
+ *mantissa = dc_fixpt_floor(mantiss);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 0cbab81ab304..2c4e8f0cb2dc 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -28,6 +28,7 @@
#include "dc.h"
#include "core_types.h"
#include "dal_asic_id.h"
+#include "calcs_logger.h"
/*
* NOTE:
@@ -52,11 +53,14 @@ static enum bw_calcs_version bw_calcs_version_from_asic_id(struct hw_asic_id asi
return BW_CALCS_VERSION_CARRIZO;
case FAMILY_VI:
+ if (ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_POLARIS12;
if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev))
return BW_CALCS_VERSION_POLARIS10;
- if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev) ||
- ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev))
+ if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev))
return BW_CALCS_VERSION_POLARIS11;
+ if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev))
+ return BW_CALCS_VERSION_VEGAM;
return BW_CALCS_VERSION_INVALID;
case FAMILY_AI:
@@ -2145,6 +2149,9 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); /* todo: this is a bug*/
break;
case BW_CALCS_VERSION_POLARIS10:
+ /* TODO: Treat VEGAM the same as P10 for now
+ * Need to tune the para for VEGAM if needed */
+ case BW_CALCS_VERSION_VEGAM:
vbios.memory_type = bw_def_gddr5;
vbios.dram_channel_width_in_bits = 32;
vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits;
@@ -2373,6 +2380,122 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2;
dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
break;
+ case BW_CALCS_VERSION_POLARIS12:
+ vbios.memory_type = bw_def_gddr5;
+ vbios.dram_channel_width_in_bits = 32;
+ vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits;
+ vbios.number_of_dram_banks = 8;
+ vbios.high_yclk = bw_int_to_fixed(6000);
+ vbios.mid_yclk = bw_int_to_fixed(3200);
+ vbios.low_yclk = bw_int_to_fixed(1000);
+ vbios.low_sclk = bw_int_to_fixed(678);
+ vbios.mid1_sclk = bw_int_to_fixed(864);
+ vbios.mid2_sclk = bw_int_to_fixed(900);
+ vbios.mid3_sclk = bw_int_to_fixed(920);
+ vbios.mid4_sclk = bw_int_to_fixed(940);
+ vbios.mid5_sclk = bw_int_to_fixed(960);
+ vbios.mid6_sclk = bw_int_to_fixed(980);
+ vbios.high_sclk = bw_int_to_fixed(1049);
+ vbios.low_voltage_max_dispclk = bw_int_to_fixed(459);
+ vbios.mid_voltage_max_dispclk = bw_int_to_fixed(654);
+ vbios.high_voltage_max_dispclk = bw_int_to_fixed(1108);
+ vbios.low_voltage_max_phyclk = bw_int_to_fixed(540);
+ vbios.mid_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios.high_voltage_max_phyclk = bw_int_to_fixed(810);
+ vbios.data_return_bus_width = bw_int_to_fixed(32);
+ vbios.trc = bw_int_to_fixed(48);
+ if (vbios.number_of_dram_channels == 2) // 64-bit
+ vbios.dmifmc_urgent_latency = bw_int_to_fixed(4);
+ else
+ vbios.dmifmc_urgent_latency = bw_int_to_fixed(3);
+ vbios.stutter_self_refresh_exit_latency = bw_int_to_fixed(5);
+ vbios.stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+ vbios.nbp_state_change_latency = bw_int_to_fixed(250);
+ vbios.mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+ vbios.scatter_gather_enable = false;
+ vbios.down_spread_percentage = bw_frc_to_fixed(5, 10);
+ vbios.cursor_width = 32;
+ vbios.average_compression_rate = 4;
+ vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+ vbios.blackout_duration = bw_int_to_fixed(0); /* us */
+ vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+ dceip.max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+ dceip.max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+ dceip.percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+ dceip.large_cursor = false;
+ dceip.dmif_request_buffer_size = bw_int_to_fixed(768);
+ dceip.dmif_pipe_en_fbc_chunk_tracker = false;
+ dceip.cursor_max_outstanding_group_num = 1;
+ dceip.lines_interleaved_into_lb = 2;
+ dceip.chunk_width = 256;
+ dceip.number_of_graphics_pipes = 5;
+ dceip.number_of_underlay_pipes = 0;
+ dceip.low_power_tiling_mode = 0;
+ dceip.display_write_back_supported = true;
+ dceip.argb_compression_support = true;
+ dceip.underlay_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35556, 10000);
+ dceip.underlay_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip.underlay_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip.underlay_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip.graphics_vscaler_efficiency6_bit_per_component =
+ bw_frc_to_fixed(35, 10);
+ dceip.graphics_vscaler_efficiency8_bit_per_component =
+ bw_frc_to_fixed(34286, 10000);
+ dceip.graphics_vscaler_efficiency10_bit_per_component =
+ bw_frc_to_fixed(32, 10);
+ dceip.graphics_vscaler_efficiency12_bit_per_component =
+ bw_int_to_fixed(3);
+ dceip.alpha_vscaler_efficiency = bw_int_to_fixed(3);
+ dceip.max_dmif_buffer_allocated = 4;
+ dceip.graphics_dmif_size = 12288;
+ dceip.underlay_luma_dmif_size = 19456;
+ dceip.underlay_chroma_dmif_size = 23552;
+ dceip.pre_downscaler_enabled = true;
+ dceip.underlay_downscale_prefetch_enabled = true;
+ dceip.lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+ dceip.lb_size_per_component444 = bw_int_to_fixed(245952);
+ dceip.graphics_lb_nodownscaling_multi_line_prefetching = true;
+ dceip.stutter_and_dram_clock_state_change_gated_before_cursor =
+ bw_int_to_fixed(1);
+ dceip.underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip.underlay420_chroma_lb_size_per_component =
+ bw_int_to_fixed(164352);
+ dceip.underlay422_lb_size_per_component = bw_int_to_fixed(
+ 82176);
+ dceip.cursor_chunk_width = bw_int_to_fixed(64);
+ dceip.cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+ dceip.underlay_maximum_width_efficient_for_tiling =
+ bw_int_to_fixed(1920);
+ dceip.underlay_maximum_height_efficient_for_tiling =
+ bw_int_to_fixed(1080);
+ dceip.peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+ bw_frc_to_fixed(3, 10);
+ dceip.peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+ bw_int_to_fixed(25);
+ dceip.minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+ 2);
+ dceip.maximum_total_outstanding_pte_requests_allowed_by_saw =
+ bw_int_to_fixed(128);
+ dceip.limit_excessive_outstanding_dmif_requests = true;
+ dceip.linear_mode_line_request_alternation_slice =
+ bw_int_to_fixed(64);
+ dceip.scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+ 32;
+ dceip.display_write_back420_luma_mcifwr_buffer_size = 12288;
+ dceip.display_write_back420_chroma_mcifwr_buffer_size = 8192;
+ dceip.request_efficiency = bw_frc_to_fixed(8, 10);
+ dceip.dispclk_per_request = bw_int_to_fixed(2);
+ dceip.dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+ dceip.display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+ dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2;
+ dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+ break;
case BW_CALCS_VERSION_STONEY:
vbios.memory_type = bw_def_gddr5;
vbios.dram_channel_width_in_bits = 64;
@@ -2815,6 +2938,19 @@ static void populate_initial_data(
data->bytes_per_pixel[num_displays + 4] = 4;
break;
}
+ } else if (pipe[i].stream->dst.width != 0 &&
+ pipe[i].stream->dst.height != 0 &&
+ pipe[i].stream->src.width != 0 &&
+ pipe[i].stream->src.height != 0) {
+ data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.width);
+ data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+ data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.height);
+ data->h_taps[num_displays + 4] = pipe[i].stream->src.width == pipe[i].stream->dst.width ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+ data->v_taps[num_displays + 4] = pipe[i].stream->src.height == pipe[i].stream->dst.height ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+ data->h_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.width, pipe[i].stream->dst.width);
+ data->v_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.height, pipe[i].stream->dst.height);
+ data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+ data->bytes_per_pixel[num_displays + 4] = 4;
} else {
data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_addressable);
data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
@@ -2873,6 +3009,11 @@ bool bw_calcs(struct dc_context *ctx,
struct bw_fixed mid_yclk = vbios->mid_yclk;
struct bw_fixed low_yclk = vbios->low_yclk;
+ if (ctx->dc->debug.bandwidth_calcs_trace) {
+ print_bw_calcs_dceip(ctx->logger, dceip);
+ print_bw_calcs_vbios(ctx->logger, vbios);
+ print_bw_calcs_data(ctx->logger, data);
+ }
calculate_bandwidth(dceip, vbios, data);
yclk_lvl = data->y_clk_level;
@@ -2968,7 +3109,33 @@ bool bw_calcs(struct dc_context *ctx,
bw_fixed_to_int(bw_mul(data->
stutter_exit_watermark[9], bw_int_to_fixed(1000)));
-
+ calcs_output->stutter_entry_wm_ns[0].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].a_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
calcs_output->urgent_wm_ns[0].a_mark =
bw_fixed_to_int(bw_mul(data->
@@ -3063,7 +3230,33 @@ bool bw_calcs(struct dc_context *ctx,
bw_fixed_to_int(bw_mul(data->
stutter_exit_watermark[9], bw_int_to_fixed(1000)));
-
+ calcs_output->stutter_entry_wm_ns[0].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].b_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
calcs_output->urgent_wm_ns[0].b_mark =
bw_fixed_to_int(bw_mul(data->
@@ -3156,6 +3349,34 @@ bool bw_calcs(struct dc_context *ctx,
bw_fixed_to_int(bw_mul(data->
stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[0].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].c_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+
calcs_output->urgent_wm_ns[0].c_mark =
bw_fixed_to_int(bw_mul(data->
urgent_watermark[4], bw_int_to_fixed(1000)));
@@ -3260,6 +3481,33 @@ bool bw_calcs(struct dc_context *ctx,
bw_fixed_to_int(bw_mul(data->
stutter_exit_watermark[9], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[0].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[1].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[2].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+ if (ctx->dc->caps.max_slave_planes) {
+ calcs_output->stutter_entry_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+ } else {
+ calcs_output->stutter_entry_wm_ns[3].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+ calcs_output->stutter_entry_wm_ns[4].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+ }
+ calcs_output->stutter_entry_wm_ns[5].d_mark =
+ bw_fixed_to_int(bw_mul(data->
+ stutter_entry_watermark[9], bw_int_to_fixed(1000)));
calcs_output->urgent_wm_ns[0].d_mark =
bw_fixed_to_int(bw_mul(data->
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 4bb43a371292..49a4ea45466d 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -873,14 +873,14 @@ bool dcn_validate_bandwidth(
}
if (pipe->plane_state->rotation % 2 == 0) {
- ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+ ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
|| v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]);
- ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+ ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
|| v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]);
} else {
- ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+ ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
|| v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]);
- ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+ ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
|| v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]);
}
v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no;
@@ -1459,39 +1459,39 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc)
void dcn_bw_sync_calcs_and_dml(struct dc *dc)
{
kernel_fpu_begin();
- DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %d ns\n"
- "sr_enter_plus_exit_time: %d ns\n"
- "urgent_latency: %d ns\n"
- "write_back_latency: %d ns\n"
- "percent_of_ideal_drambw_received_after_urg_latency: %d %\n"
+ DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
+ "sr_enter_plus_exit_time: %f ns\n"
+ "urgent_latency: %f ns\n"
+ "write_back_latency: %f ns\n"
+ "percent_of_ideal_drambw_received_after_urg_latency: %f %%\n"
"max_request_size: %d bytes\n"
- "dcfclkv_max0p9: %d kHz\n"
- "dcfclkv_nom0p8: %d kHz\n"
- "dcfclkv_mid0p72: %d kHz\n"
- "dcfclkv_min0p65: %d kHz\n"
- "max_dispclk_vmax0p9: %d kHz\n"
- "max_dispclk_vnom0p8: %d kHz\n"
- "max_dispclk_vmid0p72: %d kHz\n"
- "max_dispclk_vmin0p65: %d kHz\n"
- "max_dppclk_vmax0p9: %d kHz\n"
- "max_dppclk_vnom0p8: %d kHz\n"
- "max_dppclk_vmid0p72: %d kHz\n"
- "max_dppclk_vmin0p65: %d kHz\n"
- "socclk: %d kHz\n"
- "fabric_and_dram_bandwidth_vmax0p9: %d MB/s\n"
- "fabric_and_dram_bandwidth_vnom0p8: %d MB/s\n"
- "fabric_and_dram_bandwidth_vmid0p72: %d MB/s\n"
- "fabric_and_dram_bandwidth_vmin0p65: %d MB/s\n"
- "phyclkv_max0p9: %d kHz\n"
- "phyclkv_nom0p8: %d kHz\n"
- "phyclkv_mid0p72: %d kHz\n"
- "phyclkv_min0p65: %d kHz\n"
- "downspreading: %d %\n"
+ "dcfclkv_max0p9: %f kHz\n"
+ "dcfclkv_nom0p8: %f kHz\n"
+ "dcfclkv_mid0p72: %f kHz\n"
+ "dcfclkv_min0p65: %f kHz\n"
+ "max_dispclk_vmax0p9: %f kHz\n"
+ "max_dispclk_vnom0p8: %f kHz\n"
+ "max_dispclk_vmid0p72: %f kHz\n"
+ "max_dispclk_vmin0p65: %f kHz\n"
+ "max_dppclk_vmax0p9: %f kHz\n"
+ "max_dppclk_vnom0p8: %f kHz\n"
+ "max_dppclk_vmid0p72: %f kHz\n"
+ "max_dppclk_vmin0p65: %f kHz\n"
+ "socclk: %f kHz\n"
+ "fabric_and_dram_bandwidth_vmax0p9: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vnom0p8: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vmid0p72: %f MB/s\n"
+ "fabric_and_dram_bandwidth_vmin0p65: %f MB/s\n"
+ "phyclkv_max0p9: %f kHz\n"
+ "phyclkv_nom0p8: %f kHz\n"
+ "phyclkv_mid0p72: %f kHz\n"
+ "phyclkv_min0p65: %f kHz\n"
+ "downspreading: %f %%\n"
"round_trip_ping_latency_cycles: %d DCFCLK Cycles\n"
"urgent_out_of_order_return_per_channel: %d Bytes\n"
"number_of_channels: %d\n"
"vmm_page_size: %d Bytes\n"
- "dram_clock_change_latency: %d ns\n"
+ "dram_clock_change_latency: %f ns\n"
"return_bus_width: %d Bytes\n",
dc->dcn_soc->sr_exit_time * 1000,
dc->dcn_soc->sr_enter_plus_exit_time * 1000,
@@ -1527,11 +1527,11 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
dc->dcn_soc->vmm_page_size,
dc->dcn_soc->dram_clock_change_latency * 1000,
dc->dcn_soc->return_bus_width);
- DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %d\n"
- "det_buffer_size_in_kbyte: %d\n"
- "dpp_output_buffer_pixels: %d\n"
- "opp_output_buffer_lines: %d\n"
- "pixel_chunk_size_in_kbyte: %d\n"
+ DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %f\n"
+ "det_buffer_size_in_kbyte: %f\n"
+ "dpp_output_buffer_pixels: %f\n"
+ "opp_output_buffer_lines: %f\n"
+ "pixel_chunk_size_in_kbyte: %f\n"
"pte_enable: %d\n"
"pte_chunk_size: %d kbytes\n"
"meta_chunk_size: %d kbytes\n"
@@ -1550,13 +1550,13 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc)
"max_pscl_tolb_throughput: %d pixels/dppclk\n"
"max_lb_tovscl_throughput: %d pixels/dppclk\n"
"max_vscl_tohscl_throughput: %d pixels/dppclk\n"
- "max_hscl_ratio: %d\n"
- "max_vscl_ratio: %d\n"
+ "max_hscl_ratio: %f\n"
+ "max_vscl_ratio: %f\n"
"max_hscl_taps: %d\n"
"max_vscl_taps: %d\n"
"pte_buffer_size_in_requests: %d\n"
- "dispclk_ramping_margin: %d %\n"
- "under_scan_factor: %d %\n"
+ "dispclk_ramping_margin: %f %%\n"
+ "under_scan_factor: %f %%\n"
"max_inter_dcn_tile_repeaters: %d\n"
"can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one: %d\n"
"bug_forcing_luma_and_chroma_request_to_same_size_fixed: %d\n"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 9cd3566def8d..644b2187507b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -936,95 +936,6 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc)
return true;
}
-/*
- * TODO this whole function needs to go
- *
- * dc_surface_update is needlessly complex. See if we can just replace this
- * with a dc_plane_state and follow the atomic model a bit more closely here.
- */
-bool dc_commit_planes_to_stream(
- struct dc *dc,
- struct dc_plane_state **plane_states,
- uint8_t new_plane_count,
- struct dc_stream_state *dc_stream,
- struct dc_state *state)
-{
- /* no need to dynamically allocate this. it's pretty small */
- struct dc_surface_update updates[MAX_SURFACES];
- struct dc_flip_addrs *flip_addr;
- struct dc_plane_info *plane_info;
- struct dc_scaling_info *scaling_info;
- int i;
- struct dc_stream_update *stream_update =
- kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
-
- if (!stream_update) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
- GFP_KERNEL);
- plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
- GFP_KERNEL);
- scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
- GFP_KERNEL);
-
- if (!flip_addr || !plane_info || !scaling_info) {
- kfree(flip_addr);
- kfree(plane_info);
- kfree(scaling_info);
- kfree(stream_update);
- return false;
- }
-
- memset(updates, 0, sizeof(updates));
-
- stream_update->src = dc_stream->src;
- stream_update->dst = dc_stream->dst;
- stream_update->out_transfer_func = dc_stream->out_transfer_func;
-
- for (i = 0; i < new_plane_count; i++) {
- updates[i].surface = plane_states[i];
- updates[i].gamma =
- (struct dc_gamma *)plane_states[i]->gamma_correction;
- updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
- flip_addr[i].address = plane_states[i]->address;
- flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
- plane_info[i].color_space = plane_states[i]->color_space;
- plane_info[i].input_tf = plane_states[i]->input_tf;
- plane_info[i].format = plane_states[i]->format;
- plane_info[i].plane_size = plane_states[i]->plane_size;
- plane_info[i].rotation = plane_states[i]->rotation;
- plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
- plane_info[i].stereo_format = plane_states[i]->stereo_format;
- plane_info[i].tiling_info = plane_states[i]->tiling_info;
- plane_info[i].visible = plane_states[i]->visible;
- plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
- plane_info[i].dcc = plane_states[i]->dcc;
- scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
- scaling_info[i].src_rect = plane_states[i]->src_rect;
- scaling_info[i].dst_rect = plane_states[i]->dst_rect;
- scaling_info[i].clip_rect = plane_states[i]->clip_rect;
-
- updates[i].flip_addr = &flip_addr[i];
- updates[i].plane_info = &plane_info[i];
- updates[i].scaling_info = &scaling_info[i];
- }
-
- dc_commit_updates_for_stream(
- dc,
- updates,
- new_plane_count,
- dc_stream, stream_update, plane_states, state);
-
- kfree(flip_addr);
- kfree(plane_info);
- kfree(scaling_info);
- kfree(stream_update);
- return true;
-}
-
struct dc_state *dc_create_state(void)
{
struct dc_state *context = kzalloc(sizeof(struct dc_state),
@@ -1107,9 +1018,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
if (u->plane_info->color_space != u->surface->color_space)
update_flags->bits.color_space_change = 1;
- if (u->plane_info->input_tf != u->surface->input_tf)
- update_flags->bits.input_tf_change = 1;
-
if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
update_flags->bits.horizontal_mirror_change = 1;
@@ -1243,12 +1151,20 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
if (u->input_csc_color_matrix)
update_flags->bits.input_csc_change = 1;
- if (update_flags->bits.in_transfer_func_change
- || update_flags->bits.input_csc_change) {
+ if (u->coeff_reduction_factor)
+ update_flags->bits.coeff_reduction_change = 1;
+
+ if (update_flags->bits.in_transfer_func_change) {
type = UPDATE_TYPE_MED;
elevate_update_type(&overall_type, type);
}
+ if (update_flags->bits.input_csc_change
+ || update_flags->bits.coeff_reduction_change) {
+ type = UPDATE_TYPE_FULL;
+ elevate_update_type(&overall_type, type);
+ }
+
return overall_type;
}
@@ -1297,7 +1213,7 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
if (type == UPDATE_TYPE_FULL)
for (i = 0; i < surface_count; i++)
- updates[i].surface->update_flags.bits.full_update = 1;
+ updates[i].surface->update_flags.raw = 0xFFFFFFFF;
return type;
}
@@ -1375,6 +1291,12 @@ static void commit_planes_for_stream(struct dc *dc,
pipe_ctx->stream_res.abm->funcs->set_abm_level(
pipe_ctx->stream_res.abm, stream->abm_level);
}
+
+ if (stream_update && stream_update->periodic_fn_vsync_delta &&
+ pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
+ pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing,
+ pipe_ctx->stream->periodic_fn_vsync_delta);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 5a552cb3f8a7..267c76766dea 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,8 +36,9 @@
#include "hw_sequencer.h"
#include "resource.h"
-#define DC_LOGGER \
- logger
+
+#define DC_LOGGER_INIT(logger)
+
#define SURFACE_TRACE(...) do {\
if (dc->debug.surface_trace) \
@@ -60,8 +61,7 @@ void pre_surface_trace(
int surface_count)
{
int i;
- struct dc *core_dc = dc;
- struct dal_logger *logger = core_dc->ctx->logger;
+ DC_LOGGER_INIT(dc->ctx->logger);
for (i = 0; i < surface_count; i++) {
const struct dc_plane_state *plane_state = plane_states[i];
@@ -72,8 +72,8 @@ void pre_surface_trace(
"plane_state->visible = %d;\n"
"plane_state->flip_immediate = %d;\n"
"plane_state->address.type = %d;\n"
- "plane_state->address.grph.addr.quad_part = 0x%X;\n"
- "plane_state->address.grph.meta_addr.quad_part = 0x%X;\n"
+ "plane_state->address.grph.addr.quad_part = 0x%llX;\n"
+ "plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n"
"plane_state->scaling_quality.h_taps = %d;\n"
"plane_state->scaling_quality.v_taps = %d;\n"
"plane_state->scaling_quality.h_taps_c = %d;\n"
@@ -155,7 +155,6 @@ void pre_surface_trace(
"plane_state->tiling_info.gfx8.pipe_config = %d;\n"
"plane_state->tiling_info.gfx8.array_mode = %d;\n"
"plane_state->color_space = %d;\n"
- "plane_state->input_tf = %d;\n"
"plane_state->dcc.enable = %d;\n"
"plane_state->format = %d;\n"
"plane_state->rotation = %d;\n"
@@ -163,7 +162,6 @@ void pre_surface_trace(
plane_state->tiling_info.gfx8.pipe_config,
plane_state->tiling_info.gfx8.array_mode,
plane_state->color_space,
- plane_state->input_tf,
plane_state->dcc.enable,
plane_state->format,
plane_state->rotation,
@@ -183,8 +181,7 @@ void update_surface_trace(
int surface_count)
{
int i;
- struct dc *core_dc = dc;
- struct dal_logger *logger = core_dc->ctx->logger;
+ DC_LOGGER_INIT(dc->ctx->logger);
for (i = 0; i < surface_count; i++) {
const struct dc_surface_update *update = &updates[i];
@@ -192,8 +189,8 @@ void update_surface_trace(
SURFACE_TRACE("Update %d\n", i);
if (update->flip_addr) {
SURFACE_TRACE("flip_addr->address.type = %d;\n"
- "flip_addr->address.grph.addr.quad_part = 0x%X;\n"
- "flip_addr->address.grph.meta_addr.quad_part = 0x%X;\n"
+ "flip_addr->address.grph.addr.quad_part = 0x%llX;\n"
+ "flip_addr->address.grph.meta_addr.quad_part = 0x%llX;\n"
"flip_addr->flip_immediate = %d;\n",
update->flip_addr->address.type,
update->flip_addr->address.grph.addr.quad_part,
@@ -204,16 +201,15 @@ void update_surface_trace(
if (update->plane_info) {
SURFACE_TRACE(
"plane_info->color_space = %d;\n"
- "plane_info->input_tf = %d;\n"
"plane_info->format = %d;\n"
"plane_info->plane_size.grph.surface_pitch = %d;\n"
"plane_info->plane_size.grph.surface_size.height = %d;\n"
"plane_info->plane_size.grph.surface_size.width = %d;\n"
"plane_info->plane_size.grph.surface_size.x = %d;\n"
"plane_info->plane_size.grph.surface_size.y = %d;\n"
- "plane_info->rotation = %d;\n",
+ "plane_info->rotation = %d;\n"
+ "plane_info->stereo_format = %d;\n",
update->plane_info->color_space,
- update->plane_info->input_tf,
update->plane_info->format,
update->plane_info->plane_size.grph.surface_pitch,
update->plane_info->plane_size.grph.surface_size.height,
@@ -303,8 +299,7 @@ void update_surface_trace(
void post_surface_trace(struct dc *dc)
{
- struct dc *core_dc = dc;
- struct dal_logger *logger = core_dc->ctx->logger;
+ DC_LOGGER_INIT(dc->ctx->logger);
SURFACE_TRACE("post surface process.\n");
@@ -316,10 +311,10 @@ void context_timing_trace(
{
int i;
struct dc *core_dc = dc;
- struct dal_logger *logger = core_dc->ctx->logger;
int h_pos[MAX_PIPES], v_pos[MAX_PIPES];
struct crtc_position position;
unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index;
+ DC_LOGGER_INIT(dc->ctx->logger);
for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
@@ -354,9 +349,7 @@ void context_clock_trace(
struct dc_state *context)
{
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
- struct dc *core_dc = dc;
- struct dal_logger *logger = core_dc->ctx->logger;
-
+ DC_LOGGER_INIT(dc->ctx->logger);
CLOCK_TRACE("Current: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n"
"dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n",
context->bw.dcn.calc_clk.dispclk_khz,
@@ -371,6 +364,7 @@ void context_clock_trace(
context->bw.dcn.calc_clk.dppclk_khz,
context->bw.dcn.calc_clk.dcfclk_khz,
context->bw.dcn.calc_clk.dcfclk_deep_sleep_khz,
- context->bw.dcn.calc_clk.fclk_khz);
+ context->bw.dcn.calc_clk.fclk_khz,
+ context->bw.dcn.calc_clk.socclk_khz);
#endif
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index ebc96b720083..83d121510ef5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -208,6 +208,7 @@ void color_space_to_black_color(
case COLOR_SPACE_YCBCR709:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
+ case COLOR_SPACE_2020_YCBCR:
*black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV];
break;
@@ -216,7 +217,25 @@ void color_space_to_black_color(
black_color_format[BLACK_COLOR_FORMAT_RGB_LIMITED];
break;
- default:
+ /**
+ * Remove default and add case for all color space
+ * so when we forget to add new color space
+ * compiler will give a warning
+ */
+ case COLOR_SPACE_UNKNOWN:
+ case COLOR_SPACE_SRGB:
+ case COLOR_SPACE_XR_RGB:
+ case COLOR_SPACE_MSREF_SCRGB:
+ case COLOR_SPACE_XV_YCC_709:
+ case COLOR_SPACE_XV_YCC_601:
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+ case COLOR_SPACE_ADOBERGB:
+ case COLOR_SPACE_DCIP3:
+ case COLOR_SPACE_DISPLAYNATIVE:
+ case COLOR_SPACE_DOLBYVISION:
+ case COLOR_SPACE_APPCTRL:
+ case COLOR_SPACE_CUSTOMPOINTS:
/* fefault is sRGB black (full range). */
*black_color =
black_color_format[BLACK_COLOR_FORMAT_RGB_FULLRANGE];
@@ -230,6 +249,9 @@ bool hwss_wait_for_blank_complete(
{
int counter;
+ /* Not applicable if the pipe is not primary, save 300ms of boot time */
+ if (!tg->funcs->is_blanked)
+ return true;
for (counter = 0; counter < 100; counter++) {
if (tg->funcs->is_blanked(tg))
break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 6d1c4981a185..2fa521812d23 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -45,8 +45,9 @@
#include "dce/dce_11_0_d.h"
#include "dce/dce_11_0_enum.h"
#include "dce/dce_11_0_sh_mask.h"
-#define DC_LOGGER \
- dc_ctx->logger
+
+#define DC_LOGGER_INIT(logger)
+
#define LINK_INFO(...) \
DC_LOG_HW_HOTPLUG( \
@@ -468,6 +469,13 @@ static void link_disconnect_sink(struct dc_link *link)
link->dpcd_sink_count = 0;
}
+static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *link)
+{
+ dc_sink_release(link->local_sink);
+ link->local_sink = prev_sink;
+}
+
+
static bool detect_dp(
struct dc_link *link,
struct display_sink_capability *sink_caps,
@@ -550,6 +558,17 @@ static bool detect_dp(
return true;
}
+static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
+{
+ if (old_edid->length != new_edid->length)
+ return false;
+
+ if (new_edid->length == 0)
+ return false;
+
+ return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0);
+}
+
bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
{
struct dc_sink_init_data sink_init_data = { 0 };
@@ -557,11 +576,15 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
uint8_t i;
bool converter_disable_audio = false;
struct audio_support *aud_support = &link->dc->res_pool->audio_support;
+ bool same_edid = false;
enum dc_edid_status edid_status;
struct dc_context *dc_ctx = link->ctx;
struct dc_sink *sink = NULL;
+ struct dc_sink *prev_sink = NULL;
+ struct dpcd_caps prev_dpcd_caps;
+ bool same_dpcd = true;
enum dc_connection_type new_connection_type = dc_connection_none;
-
+ DC_LOGGER_INIT(link->ctx->logger);
if (link->connector_signal == SIGNAL_TYPE_VIRTUAL)
return false;
@@ -574,6 +597,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
link->local_sink)
return true;
+ prev_sink = link->local_sink;
+ if (prev_sink != NULL) {
+ dc_sink_retain(prev_sink);
+ memcpy(&prev_dpcd_caps, &link->dpcd_caps, sizeof(struct dpcd_caps));
+ }
link_disconnect_sink(link);
if (new_connection_type != dc_connection_none) {
@@ -615,14 +643,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
link,
&sink_caps,
&converter_disable_audio,
- aud_support, reason))
+ aud_support, reason)) {
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return false;
+ }
+ // Check if dpcp block is the same
+ if (prev_sink != NULL) {
+ if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps)))
+ same_dpcd = false;
+ }
/* Active dongle downstream unplug */
if (link->type == dc_connection_active_dongle
&& link->dpcd_caps.sink_count.
- bits.SINK_COUNT == 0)
+ bits.SINK_COUNT == 0) {
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return true;
+ }
if (link->type == dc_connection_mst_branch) {
LINK_INFO("link=%d, mst branch is now Connected\n",
@@ -630,9 +669,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
/* Need to setup mst link_cap struct here
* otherwise dc_link_detect() will leave mst link_cap
* empty which leads to allocate_mst_payload() has "0"
- * pbn_per_slot value leading to exception on dal_fixed31_32_div()
+ * pbn_per_slot value leading to exception on dc_fixpt_div()
*/
link->verified_link_cap = link->reported_link_cap;
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return false;
}
@@ -642,6 +683,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
default:
DC_ERROR("Invalid connector type! signal:%d\n",
link->connector_signal);
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return false;
} /* switch() */
@@ -664,6 +707,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
sink = dc_sink_create(&sink_init_data);
if (!sink) {
DC_ERROR("Failed to create sink!\n");
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return false;
}
@@ -687,22 +732,33 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
break;
}
- if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
- sink_caps.transaction_type ==
- DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
- /*
- * TODO debug why Dell 2413 doesn't like
- * two link trainings
- */
+ // Check if edid is the same
+ if ((prev_sink != NULL) && ((edid_status == EDID_THE_SAME) || (edid_status == EDID_OK)))
+ same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid);
- /* deal with non-mst cases */
- dp_hbr_verify_link_cap(link, &link->reported_link_cap);
- }
+ // If both edid and dpcd are the same, then discard new sink and revert back to original sink
+ if ((same_edid) && (same_dpcd)) {
+ link_disconnect_remap(prev_sink, link);
+ sink = prev_sink;
+ prev_sink = NULL;
+ } else {
+ if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+ sink_caps.transaction_type ==
+ DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
+ /*
+ * TODO debug why Dell 2413 doesn't like
+ * two link trainings
+ */
- /* HDMI-DVI Dongle */
- if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
- !sink->edid_caps.edid_hdmi)
- sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+ /* deal with non-mst cases */
+ dp_hbr_verify_link_cap(link, &link->reported_link_cap);
+ }
+
+ /* HDMI-DVI Dongle */
+ if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
+ !sink->edid_caps.edid_hdmi)
+ sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+ }
/* Connectivity log: detection */
for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) {
@@ -761,10 +817,14 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
sink_caps.signal = SIGNAL_TYPE_NONE;
}
- LINK_INFO("link=%d, dc_sink_in=%p is now %s\n",
+ LINK_INFO("link=%d, dc_sink_in=%p is now %s prev_sink=%p dpcd same=%d edid same=%d\n",
link->link_index, sink,
(sink_caps.signal == SIGNAL_TYPE_NONE ?
- "Disconnected":"Connected"));
+ "Disconnected":"Connected"), prev_sink,
+ same_dpcd, same_edid);
+
+ if (prev_sink != NULL)
+ dc_sink_release(prev_sink);
return true;
}
@@ -927,6 +987,7 @@ static bool construct(
struct integrated_info info = {{{ 0 }}};
struct dc_bios *bios = init_params->dc->ctx->dc_bios;
const struct dc_vbios_funcs *bp_funcs = bios->funcs;
+ DC_LOGGER_INIT(dc_ctx->logger);
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
@@ -1135,7 +1196,8 @@ static void dpcd_configure_panel_mode(
{
union dpcd_edp_config edp_config_set;
bool panel_mode_edp = false;
- struct dc_context *dc_ctx = link->ctx;
+ DC_LOGGER_INIT(link->ctx->logger);
+
memset(&edp_config_set, '\0', sizeof(union dpcd_edp_config));
if (DP_PANEL_MODE_DEFAULT != panel_mode) {
@@ -1183,16 +1245,21 @@ static void enable_stream_features(struct pipe_ctx *pipe_ctx)
{
struct dc_stream_state *stream = pipe_ctx->stream;
struct dc_link *link = stream->sink->link;
- union down_spread_ctrl downspread;
+ union down_spread_ctrl old_downspread;
+ union down_spread_ctrl new_downspread;
core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL,
- &downspread.raw, sizeof(downspread));
+ &old_downspread.raw, sizeof(old_downspread));
- downspread.bits.IGNORE_MSA_TIMING_PARAM =
+ new_downspread.raw = old_downspread.raw;
+
+ new_downspread.bits.IGNORE_MSA_TIMING_PARAM =
(stream->ignore_msa_timing_param) ? 1 : 0;
- core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
- &downspread.raw, sizeof(downspread));
+ if (new_downspread.raw != old_downspread.raw) {
+ core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
+ &new_downspread.raw, sizeof(new_downspread));
+ }
}
static enum dc_status enable_link_dp(
@@ -1843,9 +1910,22 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
static bool dp_active_dongle_validate_timing(
const struct dc_crtc_timing *timing,
- const struct dc_dongle_caps *dongle_caps)
+ const struct dpcd_caps *dpcd_caps)
{
unsigned int required_pix_clk = timing->pix_clk_khz;
+ const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps;
+
+ switch (dpcd_caps->dongle_type) {
+ case DISPLAY_DONGLE_DP_VGA_CONVERTER:
+ case DISPLAY_DONGLE_DP_DVI_CONVERTER:
+ case DISPLAY_DONGLE_DP_DVI_DONGLE:
+ if (timing->pixel_encoding == PIXEL_ENCODING_RGB)
+ return true;
+ else
+ return false;
+ default:
+ break;
+ }
if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER ||
dongle_caps->extendedCapValid == false)
@@ -1911,7 +1991,7 @@ enum dc_status dc_link_validate_mode_timing(
const struct dc_crtc_timing *timing)
{
uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk;
- struct dc_dongle_caps *dongle_caps = &link->dpcd_caps.dongle_caps;
+ struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
/* A hack to avoid failing any modes for EDID override feature on
* topology change such as lower quality cable for DP or different dongle
@@ -1924,7 +2004,7 @@ enum dc_status dc_link_validate_mode_timing(
return DC_EXCEED_DONGLE_CAP;
/* Active Dongle*/
- if (!dp_active_dongle_validate_timing(timing, dongle_caps))
+ if (!dp_active_dongle_validate_timing(timing, dpcd_caps))
return DC_EXCEED_DONGLE_CAP;
switch (stream->signal) {
@@ -1950,10 +2030,10 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level,
struct dc *core_dc = link->ctx->dc;
struct abm *abm = core_dc->res_pool->abm;
struct dmcu *dmcu = core_dc->res_pool->dmcu;
- struct dc_context *dc_ctx = link->ctx;
unsigned int controller_id = 0;
bool use_smooth_brightness = true;
int i;
+ DC_LOGGER_INIT(link->ctx->logger);
if ((dmcu == NULL) ||
(abm == NULL) ||
@@ -1961,7 +2041,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level,
return false;
if (stream) {
- if (stream->bl_pwm_level == 0)
+ if (stream->bl_pwm_level == EDP_BACKLIGHT_RAMP_DISABLE_LEVEL)
frame_ramp = 0;
((struct dc_stream_state *)stream)->bl_pwm_level = level;
@@ -2038,10 +2118,10 @@ static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream)
&stream->sink->link->cur_link_settings;
uint32_t link_rate_in_mbps =
link_settings->link_rate * LINK_RATE_REF_FREQ_IN_MHZ;
- struct fixed31_32 mbps = dal_fixed31_32_from_int(
+ struct fixed31_32 mbps = dc_fixpt_from_int(
link_rate_in_mbps * link_settings->lane_count);
- return dal_fixed31_32_div_int(mbps, 54);
+ return dc_fixpt_div_int(mbps, 54);
}
static int get_color_depth(enum dc_color_depth color_depth)
@@ -2082,7 +2162,7 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx)
numerator = 64 * PEAK_FACTOR_X1000;
denominator = 54 * 8 * 1000 * 1000;
kbps *= numerator;
- peak_kbps = dal_fixed31_32_from_fraction(kbps, denominator);
+ peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
return peak_kbps;
}
@@ -2149,8 +2229,8 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
struct fixed31_32 avg_time_slots_per_mtp;
struct fixed31_32 pbn;
struct fixed31_32 pbn_per_slot;
- struct dc_context *dc_ctx = link->ctx;
uint8_t i;
+ DC_LOGGER_INIT(link->ctx->logger);
/* enable_link_dp_mst already check link->enabled_stream_count
* and stream is in link->stream[]. This is called during set mode,
@@ -2178,11 +2258,11 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
link->mst_stream_alloc_table.stream_count);
for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
- DC_LOG_MST("stream_enc[%d]: 0x%x "
+ DC_LOG_MST("stream_enc[%d]: %p "
"stream[%d].vcp_id: %d "
"stream[%d].slot_count: %d\n",
i,
- link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
+ (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
i,
link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
i,
@@ -2209,7 +2289,7 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
/* slot X.Y for only current stream */
pbn_per_slot = get_pbn_per_slot(stream);
pbn = get_pbn_from_timing(pipe_ctx);
- avg_time_slots_per_mtp = dal_fixed31_32_div(pbn, pbn_per_slot);
+ avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot);
stream_encoder->funcs->set_mst_bandwidth(
stream_encoder,
@@ -2226,10 +2306,10 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
struct link_encoder *link_encoder = link->link_enc;
struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
struct dp_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp = dal_fixed31_32_from_int(0);
+ struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
uint8_t i;
bool mst_mode = (link->type == dc_connection_mst_branch);
- struct dc_context *dc_ctx = link->ctx;
+ DC_LOGGER_INIT(link->ctx->logger);
/* deallocate_mst_payload is called before disable link. When mode or
* disable/enable monitor, new stream is created which is not in link
@@ -2268,11 +2348,11 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
link->mst_stream_alloc_table.stream_count);
for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
- DC_LOG_MST("stream_enc[%d]: 0x%x "
+ DC_LOG_MST("stream_enc[%d]: %p "
"stream[%d].vcp_id: %d "
"stream[%d].slot_count: %d\n",
i,
- link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
+ (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
i,
link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
i,
@@ -2302,8 +2382,8 @@ void core_link_enable_stream(
struct pipe_ctx *pipe_ctx)
{
struct dc *core_dc = pipe_ctx->stream->ctx->dc;
- struct dc_context *dc_ctx = pipe_ctx->stream->ctx;
enum dc_status status;
+ DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
/* eDP lit up by bios already, no need to enable again. */
if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 3b5053570229..7d609c71394b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1378,8 +1378,8 @@ static uint32_t bandwidth_in_kbps_from_timing(
{
uint32_t bits_per_channel = 0;
uint32_t kbps;
- switch (timing->display_color_depth) {
+ switch (timing->display_color_depth) {
case COLOR_DEPTH_666:
bits_per_channel = 6;
break;
@@ -1401,14 +1401,20 @@ static uint32_t bandwidth_in_kbps_from_timing(
default:
break;
}
+
ASSERT(bits_per_channel != 0);
kbps = timing->pix_clk_khz;
kbps *= bits_per_channel;
- if (timing->flags.Y_ONLY != 1)
+ if (timing->flags.Y_ONLY != 1) {
/*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/
kbps *= 3;
+ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+ kbps /= 2;
+ else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ kbps = kbps * 2 / 3;
+ }
return kbps;
@@ -2278,6 +2284,8 @@ static bool retrieve_link_cap(struct dc_link *link)
union edp_configuration_cap edp_config_cap;
union dp_downstream_port_present ds_port = { 0 };
enum dc_status status = DC_ERROR_UNEXPECTED;
+ uint32_t read_dpcd_retry_cnt = 3;
+ int i;
memset(dpcd_data, '\0', sizeof(dpcd_data));
memset(&down_strm_port_count,
@@ -2285,11 +2293,15 @@ static bool retrieve_link_cap(struct dc_link *link)
memset(&edp_config_cap, '\0',
sizeof(union edp_configuration_cap));
- status = core_link_read_dpcd(
- link,
- DP_DPCD_REV,
- dpcd_data,
- sizeof(dpcd_data));
+ for (i = 0; i < read_dpcd_retry_cnt; i++) {
+ status = core_link_read_dpcd(
+ link,
+ DP_DPCD_REV,
+ dpcd_data,
+ sizeof(dpcd_data));
+ if (status == DC_OK)
+ break;
+ }
if (status != DC_OK) {
dm_error("%s: Read dpcd data failed.\n", __func__);
@@ -2376,6 +2388,10 @@ bool detect_dp_sink_caps(struct dc_link *link)
void detect_edp_sink_caps(struct dc_link *link)
{
retrieve_link_cap(link);
+
+ if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)
+ link->reported_link_cap.link_rate = LINK_RATE_HIGH2;
+
link->verified_link_cap = link->reported_link_cap;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 7c866a7d5e77..82cd1d6e6e59 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -11,8 +11,6 @@
#include "dc_link_dp.h"
#include "dc_link_ddc.h"
#include "dm_helpers.h"
-#include "dce/dce_link_encoder.h"
-#include "dce/dce_stream_encoder.h"
#include "dpcd_defs.h"
enum dc_status core_link_read_dpcd(
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ba3487e97361..751f3ac9d921 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -45,8 +45,9 @@
#include "dcn10/dcn10_resource.h"
#endif
#include "dce120/dce120_resource.h"
-#define DC_LOGGER \
- ctx->logger
+
+#define DC_LOGGER_INIT(logger)
+
enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
{
enum dce_version dc_version = DCE_VERSION_UNKNOWN;
@@ -78,6 +79,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) {
dc_version = DCE_VERSION_11_2;
}
+ if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev))
+ dc_version = DCE_VERSION_11_22;
break;
case FAMILY_AI:
dc_version = DCE_VERSION_12_0;
@@ -124,6 +127,7 @@ struct resource_pool *dc_create_resource_pool(
num_virtual_links, dc, asic_id);
break;
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
res_pool = dce112_create_resource_pool(
num_virtual_links, dc);
break;
@@ -492,9 +496,9 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx)
data->viewport_c.x = data->viewport.x / vpc_div;
data->viewport_c.y = data->viewport.y / vpc_div;
data->inits.h_c = (data->viewport.x % vpc_div) != 0 ?
- dal_fixed31_32_half : dal_fixed31_32_zero;
+ dc_fixpt_half : dc_fixpt_zero;
data->inits.v_c = (data->viewport.y % vpc_div) != 0 ?
- dal_fixed31_32_half : dal_fixed31_32_zero;
+ dc_fixpt_half : dc_fixpt_zero;
/* Round up, assume original video size always even dimensions */
data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div;
data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div;
@@ -623,10 +627,10 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270)
rect_swap_helper(&surf_src);
- pipe_ctx->plane_res.scl_data.ratios.horz = dal_fixed31_32_from_fraction(
+ pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_from_fraction(
surf_src.width,
plane_state->dst_rect.width);
- pipe_ctx->plane_res.scl_data.ratios.vert = dal_fixed31_32_from_fraction(
+ pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_from_fraction(
surf_src.height,
plane_state->dst_rect.height);
@@ -648,6 +652,14 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.ratios.horz_c.value /= 2;
pipe_ctx->plane_res.scl_data.ratios.vert_c.value /= 2;
}
+ pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_truncate(
+ pipe_ctx->plane_res.scl_data.ratios.horz, 19);
+ pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_truncate(
+ pipe_ctx->plane_res.scl_data.ratios.vert, 19);
+ pipe_ctx->plane_res.scl_data.ratios.horz_c = dc_fixpt_truncate(
+ pipe_ctx->plane_res.scl_data.ratios.horz_c, 19);
+ pipe_ctx->plane_res.scl_data.ratios.vert_c = dc_fixpt_truncate(
+ pipe_ctx->plane_res.scl_data.ratios.vert_c, 19);
}
static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *recout_skip)
@@ -684,32 +696,33 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
* init_bot = init + scaling_ratio
* init_c = init + truncated_vp_c_offset(from calculate viewport)
*/
- data->inits.h = dal_fixed31_32_div_int(
- dal_fixed31_32_add_int(data->ratios.horz, data->taps.h_taps + 1), 2);
+ data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int(
+ dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19);
- data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_div_int(
- dal_fixed31_32_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2));
+ data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int(
+ dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19);
- data->inits.v = dal_fixed31_32_div_int(
- dal_fixed31_32_add_int(data->ratios.vert, data->taps.v_taps + 1), 2);
+ data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int(
+ dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19);
+
+ data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int(
+ dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19);
- data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_div_int(
- dal_fixed31_32_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2));
/* Adjust for viewport end clip-off */
if ((data->viewport.x + data->viewport.width) < (src.x + src.width) && !flip_horz_scan_dir) {
int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x;
- int int_part = dal_fixed31_32_floor(
- dal_fixed31_32_sub(data->inits.h, data->ratios.horz));
+ int int_part = dc_fixpt_floor(
+ dc_fixpt_sub(data->inits.h, data->ratios.horz));
int_part = int_part > 0 ? int_part : 0;
data->viewport.width += int_part < vp_clip ? int_part : vp_clip;
}
if ((data->viewport.y + data->viewport.height) < (src.y + src.height) && !flip_vert_scan_dir) {
int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y;
- int int_part = dal_fixed31_32_floor(
- dal_fixed31_32_sub(data->inits.v, data->ratios.vert));
+ int int_part = dc_fixpt_floor(
+ dc_fixpt_sub(data->inits.v, data->ratios.vert));
int_part = int_part > 0 ? int_part : 0;
data->viewport.height += int_part < vp_clip ? int_part : vp_clip;
@@ -717,8 +730,8 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div && !flip_horz_scan_dir) {
int vp_clip = (src.x + src.width) / vpc_div -
data->viewport_c.width - data->viewport_c.x;
- int int_part = dal_fixed31_32_floor(
- dal_fixed31_32_sub(data->inits.h_c, data->ratios.horz_c));
+ int int_part = dc_fixpt_floor(
+ dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c));
int_part = int_part > 0 ? int_part : 0;
data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip;
@@ -726,8 +739,8 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div && !flip_vert_scan_dir) {
int vp_clip = (src.y + src.height) / vpc_div -
data->viewport_c.height - data->viewport_c.y;
- int int_part = dal_fixed31_32_floor(
- dal_fixed31_32_sub(data->inits.v_c, data->ratios.vert_c));
+ int int_part = dc_fixpt_floor(
+ dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c));
int_part = int_part > 0 ? int_part : 0;
data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip;
@@ -737,9 +750,9 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
if (data->viewport.x && !flip_horz_scan_dir) {
int int_part;
- data->inits.h = dal_fixed31_32_add(data->inits.h, dal_fixed31_32_mul_int(
+ data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int(
data->ratios.horz, recout_skip->width));
- int_part = dal_fixed31_32_floor(data->inits.h) - data->viewport.x;
+ int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x;
if (int_part < data->taps.h_taps) {
int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ?
(data->taps.h_taps - int_part) : data->viewport.x;
@@ -752,15 +765,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
int_part = data->taps.h_taps;
}
data->inits.h.value &= 0xffffffff;
- data->inits.h = dal_fixed31_32_add_int(data->inits.h, int_part);
+ data->inits.h = dc_fixpt_add_int(data->inits.h, int_part);
}
if (data->viewport_c.x && !flip_horz_scan_dir) {
int int_part;
- data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_mul_int(
+ data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int(
data->ratios.horz_c, recout_skip->width));
- int_part = dal_fixed31_32_floor(data->inits.h_c) - data->viewport_c.x;
+ int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x;
if (int_part < data->taps.h_taps_c) {
int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ?
(data->taps.h_taps_c - int_part) : data->viewport_c.x;
@@ -773,15 +786,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
int_part = data->taps.h_taps_c;
}
data->inits.h_c.value &= 0xffffffff;
- data->inits.h_c = dal_fixed31_32_add_int(data->inits.h_c, int_part);
+ data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part);
}
if (data->viewport.y && !flip_vert_scan_dir) {
int int_part;
- data->inits.v = dal_fixed31_32_add(data->inits.v, dal_fixed31_32_mul_int(
+ data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int(
data->ratios.vert, recout_skip->height));
- int_part = dal_fixed31_32_floor(data->inits.v) - data->viewport.y;
+ int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y;
if (int_part < data->taps.v_taps) {
int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ?
(data->taps.v_taps - int_part) : data->viewport.y;
@@ -794,15 +807,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
int_part = data->taps.v_taps;
}
data->inits.v.value &= 0xffffffff;
- data->inits.v = dal_fixed31_32_add_int(data->inits.v, int_part);
+ data->inits.v = dc_fixpt_add_int(data->inits.v, int_part);
}
if (data->viewport_c.y && !flip_vert_scan_dir) {
int int_part;
- data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_mul_int(
+ data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int(
data->ratios.vert_c, recout_skip->height));
- int_part = dal_fixed31_32_floor(data->inits.v_c) - data->viewport_c.y;
+ int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y;
if (int_part < data->taps.v_taps_c) {
int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ?
(data->taps.v_taps_c - int_part) : data->viewport_c.y;
@@ -815,12 +828,12 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r
int_part = data->taps.v_taps_c;
}
data->inits.v_c.value &= 0xffffffff;
- data->inits.v_c = dal_fixed31_32_add_int(data->inits.v_c, int_part);
+ data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part);
}
/* Interlaced inits based on final vert inits */
- data->inits.v_bot = dal_fixed31_32_add(data->inits.v, data->ratios.vert);
- data->inits.v_c_bot = dal_fixed31_32_add(data->inits.v_c, data->ratios.vert_c);
+ data->inits.v_bot = dc_fixpt_add(data->inits.v, data->ratios.vert);
+ data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c);
if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) {
@@ -835,7 +848,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
struct view recout_skip = { 0 };
bool res = false;
- struct dc_context *ctx = pipe_ctx->stream->ctx;
+ DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
/* Important: scaling ratio calculation requires pixel format,
* lb depth calculation requires recout and taps require scaling ratios.
* Inits require viewport, taps, ratios and recout of split pipe
@@ -843,6 +856,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
pipe_ctx->plane_state->format);
+ if (pipe_ctx->stream->timing.flags.INTERLACE)
+ pipe_ctx->stream->dst.height *= 2;
+
calculate_scaling_ratios(pipe_ctx);
calculate_viewport(pipe_ctx);
@@ -863,6 +879,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right;
pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
+ if (pipe_ctx->stream->timing.flags.INTERLACE)
+ pipe_ctx->plane_res.scl_data.v_active *= 2;
/* Taps calculations */
@@ -908,6 +926,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
plane_state->dst_rect.x,
plane_state->dst_rect.y);
+ if (pipe_ctx->stream->timing.flags.INTERLACE)
+ pipe_ctx->stream->dst.height /= 2;
+
return res;
}
@@ -1294,6 +1315,19 @@ bool dc_add_all_planes_for_stream(
}
+static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream,
+ struct dc_stream_state *new_stream)
+{
+ if (cur_stream == NULL)
+ return true;
+
+ if (memcmp(&cur_stream->hdr_static_metadata,
+ &new_stream->hdr_static_metadata,
+ sizeof(struct dc_info_packet)) != 0)
+ return true;
+
+ return false;
+}
static bool is_timing_changed(struct dc_stream_state *cur_stream,
struct dc_stream_state *new_stream)
@@ -1329,6 +1363,9 @@ static bool are_stream_backends_same(
if (is_timing_changed(stream_a, stream_b))
return false;
+ if (is_hdr_static_meta_changed(stream_a, stream_b))
+ return false;
+
return true;
}
@@ -1599,18 +1636,6 @@ enum dc_status dc_remove_stream_from_ctx(
return DC_OK;
}
-static void copy_pipe_ctx(
- const struct pipe_ctx *from_pipe_ctx, struct pipe_ctx *to_pipe_ctx)
-{
- struct dc_plane_state *plane_state = to_pipe_ctx->plane_state;
- struct dc_stream_state *stream = to_pipe_ctx->stream;
-
- *to_pipe_ctx = *from_pipe_ctx;
- to_pipe_ctx->stream = stream;
- if (plane_state != NULL)
- to_pipe_ctx->plane_state = plane_state;
-}
-
static struct dc_stream_state *find_pll_sharable_stream(
struct dc_stream_state *stream_needs_pll,
struct dc_state *context)
@@ -1703,7 +1728,7 @@ enum dc_status resource_map_pool_resources(
pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
#endif
- if (pipe_idx < 0)
+ if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL)
return DC_NO_CONTROLLER_RESOURCE;
pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
@@ -1752,26 +1777,6 @@ enum dc_status resource_map_pool_resources(
return DC_ERROR_UNEXPECTED;
}
-/* first stream in the context is used to populate the rest */
-void validate_guaranteed_copy_streams(
- struct dc_state *context,
- int max_streams)
-{
- int i;
-
- for (i = 1; i < max_streams; i++) {
- context->streams[i] = context->streams[0];
-
- copy_pipe_ctx(&context->res_ctx.pipe_ctx[0],
- &context->res_ctx.pipe_ctx[i]);
- context->res_ctx.pipe_ctx[i].stream =
- context->res_ctx.pipe_ctx[0].stream;
-
- dc_stream_retain(context->streams[i]);
- context->stream_count++;
- }
-}
-
void dc_resource_state_copy_construct_current(
const struct dc *dc,
struct dc_state *dst_ctx)
@@ -1798,9 +1803,9 @@ enum dc_status dc_validate_global_state(
return DC_ERROR_UNEXPECTED;
if (dc->res_pool->funcs->validate_global) {
- result = dc->res_pool->funcs->validate_global(dc, new_ctx);
- if (result != DC_OK)
- return result;
+ result = dc->res_pool->funcs->validate_global(dc, new_ctx);
+ if (result != DC_OK)
+ return result;
}
for (i = 0; i < new_ctx->stream_count; i++) {
@@ -1843,7 +1848,7 @@ enum dc_status dc_validate_global_state(
}
static void patch_gamut_packet_checksum(
- struct encoder_info_packet *gamut_packet)
+ struct dc_info_packet *gamut_packet)
{
/* For gamut we recalc checksum */
if (gamut_packet->valid) {
@@ -1862,12 +1867,11 @@ static void patch_gamut_packet_checksum(
}
static void set_avi_info_frame(
- struct encoder_info_packet *info_packet,
+ struct dc_info_packet *info_packet,
struct pipe_ctx *pipe_ctx)
{
struct dc_stream_state *stream = pipe_ctx->stream;
enum dc_color_space color_space = COLOR_SPACE_UNKNOWN;
- struct info_frame info_frame = { {0} };
uint32_t pixel_encoding = 0;
enum scanning_type scan_type = SCANNING_TYPE_NODATA;
enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA;
@@ -1877,22 +1881,24 @@ static void set_avi_info_frame(
unsigned int cn0_cn1_value = 0;
uint8_t *check_sum = NULL;
uint8_t byte_index = 0;
- union hdmi_info_packet *hdmi_info = &info_frame.avi_info_packet.info_packet_hdmi;
+ union hdmi_info_packet hdmi_info;
union display_content_support support = {0};
unsigned int vic = pipe_ctx->stream->timing.vic;
enum dc_timing_3d_format format;
+ memset(&hdmi_info, 0, sizeof(union hdmi_info_packet));
+
color_space = pipe_ctx->stream->output_color_space;
if (color_space == COLOR_SPACE_UNKNOWN)
color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ?
COLOR_SPACE_SRGB:COLOR_SPACE_YCBCR709;
/* Initialize header */
- hdmi_info->bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI;
+ hdmi_info.bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI;
/* InfoFrameVersion_3 is defined by CEA861F (Section 6.4), but shall
* not be used in HDMI 2.0 (Section 10.1) */
- hdmi_info->bits.header.version = 2;
- hdmi_info->bits.header.length = HDMI_AVI_INFOFRAME_SIZE;
+ hdmi_info.bits.header.version = 2;
+ hdmi_info.bits.header.length = HDMI_AVI_INFOFRAME_SIZE;
/*
* IDO-defined (Y2,Y1,Y0 = 1,1,1) shall not be used by devices built
@@ -1918,39 +1924,39 @@ static void set_avi_info_frame(
/* Y0_Y1_Y2 : The pixel encoding */
/* H14b AVI InfoFrame has extension on Y-field from 2 bits to 3 bits */
- hdmi_info->bits.Y0_Y1_Y2 = pixel_encoding;
+ hdmi_info.bits.Y0_Y1_Y2 = pixel_encoding;
/* A0 = 1 Active Format Information valid */
- hdmi_info->bits.A0 = ACTIVE_FORMAT_VALID;
+ hdmi_info.bits.A0 = ACTIVE_FORMAT_VALID;
/* B0, B1 = 3; Bar info data is valid */
- hdmi_info->bits.B0_B1 = BAR_INFO_BOTH_VALID;
+ hdmi_info.bits.B0_B1 = BAR_INFO_BOTH_VALID;
- hdmi_info->bits.SC0_SC1 = PICTURE_SCALING_UNIFORM;
+ hdmi_info.bits.SC0_SC1 = PICTURE_SCALING_UNIFORM;
/* S0, S1 : Underscan / Overscan */
/* TODO: un-hardcode scan type */
scan_type = SCANNING_TYPE_UNDERSCAN;
- hdmi_info->bits.S0_S1 = scan_type;
+ hdmi_info.bits.S0_S1 = scan_type;
/* C0, C1 : Colorimetry */
if (color_space == COLOR_SPACE_YCBCR709 ||
color_space == COLOR_SPACE_YCBCR709_LIMITED)
- hdmi_info->bits.C0_C1 = COLORIMETRY_ITU709;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
else if (color_space == COLOR_SPACE_YCBCR601 ||
color_space == COLOR_SPACE_YCBCR601_LIMITED)
- hdmi_info->bits.C0_C1 = COLORIMETRY_ITU601;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_ITU601;
else {
- hdmi_info->bits.C0_C1 = COLORIMETRY_NO_DATA;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA;
}
if (color_space == COLOR_SPACE_2020_RGB_FULLRANGE ||
color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE ||
color_space == COLOR_SPACE_2020_YCBCR) {
- hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
- hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED;
+ hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
} else if (color_space == COLOR_SPACE_ADOBERGB) {
- hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB;
- hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED;
+ hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB;
+ hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
}
/* TODO: un-hardcode aspect ratio */
@@ -1959,18 +1965,18 @@ static void set_avi_info_frame(
switch (aspect) {
case ASPECT_RATIO_4_3:
case ASPECT_RATIO_16_9:
- hdmi_info->bits.M0_M1 = aspect;
+ hdmi_info.bits.M0_M1 = aspect;
break;
case ASPECT_RATIO_NO_DATA:
case ASPECT_RATIO_64_27:
case ASPECT_RATIO_256_135:
default:
- hdmi_info->bits.M0_M1 = 0;
+ hdmi_info.bits.M0_M1 = 0;
}
/* Active Format Aspect ratio - same as Picture Aspect Ratio. */
- hdmi_info->bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
+ hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
/* TODO: un-hardcode cn0_cn1 and itc */
@@ -2013,8 +2019,8 @@ static void set_avi_info_frame(
}
}
}
- hdmi_info->bits.CN0_CN1 = cn0_cn1_value;
- hdmi_info->bits.ITC = itc_value;
+ hdmi_info.bits.CN0_CN1 = cn0_cn1_value;
+ hdmi_info.bits.ITC = itc_value;
}
/* TODO : We should handle YCC quantization */
@@ -2023,19 +2029,19 @@ static void set_avi_info_frame(
stream->sink->edid_caps.qy_bit == 1) {
if (color_space == COLOR_SPACE_SRGB ||
color_space == COLOR_SPACE_2020_RGB_FULLRANGE) {
- hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE;
- hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE;
+ hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE;
+ hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE;
} else if (color_space == COLOR_SPACE_SRGB_LIMITED ||
color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) {
- hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE;
- hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+ hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE;
+ hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
} else {
- hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE;
- hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+ hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE;
+ hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
}
} else {
- hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE;
- hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+ hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE;
+ hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
}
///VIC
@@ -2060,51 +2066,49 @@ static void set_avi_info_frame(
break;
}
}
- hdmi_info->bits.VIC0_VIC7 = vic;
+ hdmi_info.bits.VIC0_VIC7 = vic;
/* pixel repetition
* PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
* repetition start from 1 */
- hdmi_info->bits.PR0_PR3 = 0;
+ hdmi_info.bits.PR0_PR3 = 0;
/* Bar Info
* barTop: Line Number of End of Top Bar.
* barBottom: Line Number of Start of Bottom Bar.
* barLeft: Pixel Number of End of Left Bar.
* barRight: Pixel Number of Start of Right Bar. */
- hdmi_info->bits.bar_top = stream->timing.v_border_top;
- hdmi_info->bits.bar_bottom = (stream->timing.v_total
+ hdmi_info.bits.bar_top = stream->timing.v_border_top;
+ hdmi_info.bits.bar_bottom = (stream->timing.v_total
- stream->timing.v_border_bottom + 1);
- hdmi_info->bits.bar_left = stream->timing.h_border_left;
- hdmi_info->bits.bar_right = (stream->timing.h_total
+ hdmi_info.bits.bar_left = stream->timing.h_border_left;
+ hdmi_info.bits.bar_right = (stream->timing.h_total
- stream->timing.h_border_right + 1);
/* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */
- check_sum = &info_frame.avi_info_packet.info_packet_hdmi.packet_raw_data.sb[0];
+ check_sum = &hdmi_info.packet_raw_data.sb[0];
*check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2;
for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++)
- *check_sum += hdmi_info->packet_raw_data.sb[byte_index];
+ *check_sum += hdmi_info.packet_raw_data.sb[byte_index];
/* one byte complement */
*check_sum = (uint8_t) (0x100 - *check_sum);
/* Store in hw_path_mode */
- info_packet->hb0 = hdmi_info->packet_raw_data.hb0;
- info_packet->hb1 = hdmi_info->packet_raw_data.hb1;
- info_packet->hb2 = hdmi_info->packet_raw_data.hb2;
+ info_packet->hb0 = hdmi_info.packet_raw_data.hb0;
+ info_packet->hb1 = hdmi_info.packet_raw_data.hb1;
+ info_packet->hb2 = hdmi_info.packet_raw_data.hb2;
- for (byte_index = 0; byte_index < sizeof(info_frame.avi_info_packet.
- info_packet_hdmi.packet_raw_data.sb); byte_index++)
- info_packet->sb[byte_index] = info_frame.avi_info_packet.
- info_packet_hdmi.packet_raw_data.sb[byte_index];
+ for (byte_index = 0; byte_index < sizeof(hdmi_info.packet_raw_data.sb); byte_index++)
+ info_packet->sb[byte_index] = hdmi_info.packet_raw_data.sb[byte_index];
info_packet->valid = true;
}
static void set_vendor_info_packet(
- struct encoder_info_packet *info_packet,
+ struct dc_info_packet *info_packet,
struct dc_stream_state *stream)
{
uint32_t length = 0;
@@ -2217,7 +2221,7 @@ static void set_vendor_info_packet(
}
static void set_spd_info_packet(
- struct encoder_info_packet *info_packet,
+ struct dc_info_packet *info_packet,
struct dc_stream_state *stream)
{
/* SPD info packet for FreeSync */
@@ -2338,104 +2342,19 @@ static void set_spd_info_packet(
}
static void set_hdr_static_info_packet(
- struct encoder_info_packet *info_packet,
+ struct dc_info_packet *info_packet,
struct dc_stream_state *stream)
{
- uint16_t i = 0;
- enum signal_type signal = stream->signal;
- uint32_t data;
+ /* HDR Static Metadata info packet for HDR10 */
- if (!stream->hdr_static_metadata.hdr_supported)
+ if (!stream->hdr_static_metadata.valid)
return;
- if (dc_is_hdmi_signal(signal)) {
- info_packet->valid = true;
-
- info_packet->hb0 = 0x87;
- info_packet->hb1 = 0x01;
- info_packet->hb2 = 0x1A;
- i = 1;
- } else if (dc_is_dp_signal(signal)) {
- info_packet->valid = true;
-
- info_packet->hb0 = 0x00;
- info_packet->hb1 = 0x87;
- info_packet->hb2 = 0x1D;
- info_packet->hb3 = (0x13 << 2);
- i = 2;
- }
-
- data = stream->hdr_static_metadata.is_hdr;
- info_packet->sb[i++] = data ? 0x02 : 0x00;
- info_packet->sb[i++] = 0x00;
-
- data = stream->hdr_static_metadata.chromaticity_green_x / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_green_y / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_blue_x / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_blue_y / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_red_x / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_red_y / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_white_point_x / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.chromaticity_white_point_y / 2;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.max_luminance;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.min_luminance;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.maximum_content_light_level;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- data = stream->hdr_static_metadata.maximum_frame_average_light_level;
- info_packet->sb[i++] = data & 0xFF;
- info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
- if (dc_is_hdmi_signal(signal)) {
- uint32_t checksum = 0;
-
- checksum += info_packet->hb0;
- checksum += info_packet->hb1;
- checksum += info_packet->hb2;
-
- for (i = 1; i <= info_packet->hb2; i++)
- checksum += info_packet->sb[i];
-
- info_packet->sb[0] = 0x100 - checksum;
- } else if (dc_is_dp_signal(signal)) {
- info_packet->sb[0] = 0x01;
- info_packet->sb[1] = 0x1A;
- }
+ *info_packet = stream->hdr_static_metadata;
}
static void set_vsc_info_packet(
- struct encoder_info_packet *info_packet,
+ struct dc_info_packet *info_packet,
struct dc_stream_state *stream)
{
unsigned int vscPacketRevision = 0;
@@ -2650,6 +2569,8 @@ bool pipe_need_reprogram(
if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream))
return true;
+ if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream))
+ return true;
return false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index ce0747ed0f00..3732a1de9d6c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -101,14 +101,16 @@ static void construct(struct dc_stream_state *stream,
stream->status.link = stream->sink->link;
update_stream_signal(stream);
+
+ stream->out_transfer_func = dc_create_transfer_func();
+ stream->out_transfer_func->type = TF_TYPE_BYPASS;
}
static void destruct(struct dc_stream_state *stream)
{
dc_sink_release(stream->sink);
if (stream->out_transfer_func != NULL) {
- dc_transfer_func_release(
- stream->out_transfer_func);
+ dc_transfer_func_release(stream->out_transfer_func);
stream->out_transfer_func = NULL;
}
}
@@ -176,6 +178,7 @@ bool dc_stream_set_cursor_attributes(
int i;
struct dc *core_dc;
struct resource_context *res_ctx;
+ struct pipe_ctx *pipe_to_program = NULL;
if (NULL == stream) {
dm_error("DC: dc_stream is NULL!\n");
@@ -203,9 +206,17 @@ bool dc_stream_set_cursor_attributes(
if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
continue;
+ if (!pipe_to_program) {
+ pipe_to_program = pipe_ctx;
+ core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+ }
core_dc->hwss.set_cursor_attribute(pipe_ctx);
}
+
+ if (pipe_to_program)
+ core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+
return true;
}
@@ -216,6 +227,7 @@ bool dc_stream_set_cursor_position(
int i;
struct dc *core_dc;
struct resource_context *res_ctx;
+ struct pipe_ctx *pipe_to_program = NULL;
if (NULL == stream) {
dm_error("DC: dc_stream is NULL!\n");
@@ -241,9 +253,17 @@ bool dc_stream_set_cursor_position(
!pipe_ctx->plane_res.ipp)
continue;
+ if (!pipe_to_program) {
+ pipe_to_program = pipe_ctx;
+ core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+ }
+
core_dc->hwss.set_cursor_position(pipe_ctx);
}
+ if (pipe_to_program)
+ core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index 132eef3826e2..68a71adeb12e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -38,6 +38,12 @@
static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
{
plane_state->ctx = ctx;
+
+ plane_state->gamma_correction = dc_create_gamma();
+ plane_state->gamma_correction->is_identity = true;
+
+ plane_state->in_transfer_func = dc_create_transfer_func();
+ plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
}
static void destruct(struct dc_plane_state *plane_state)
@@ -175,7 +181,7 @@ void dc_transfer_func_release(struct dc_transfer_func *tf)
kref_put(&tf->refcount, dc_transfer_func_free);
}
-struct dc_transfer_func *dc_create_transfer_func(void)
+struct dc_transfer_func *dc_create_transfer_func()
{
struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index fa4b3c8b3bb7..9cfde0ccf4e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -38,7 +38,7 @@
#include "inc/compressor.h"
#include "dml/display_mode_lib.h"
-#define DC_VER "3.1.38"
+#define DC_VER "3.1.44"
#define MAX_SURFACES 3
#define MAX_STREAMS 6
@@ -75,6 +75,7 @@ struct dc_caps {
bool dynamic_audio;
bool is_apu;
bool dual_link_dvi;
+ bool post_blend_color_processing;
};
struct dc_dcc_surface_param {
@@ -202,6 +203,8 @@ struct dc_debug {
bool timing_trace;
bool clock_trace;
bool validation_trace;
+ bool bandwidth_calcs_trace;
+ int max_downscale_src_width;
/* stutter efficiency related */
bool disable_stutter;
@@ -238,6 +241,8 @@ struct dc_debug {
bool az_endpoint_mute_only;
bool always_use_regamma;
bool p010_mpo_support;
+ bool recovery_enabled;
+
};
struct dc_state;
struct resource_pool;
@@ -332,20 +337,6 @@ enum {
TRANSFER_FUNC_POINTS = 1025
};
-// Moved here from color module for linux
-enum color_transfer_func {
- transfer_func_unknown,
- transfer_func_srgb,
- transfer_func_bt709,
- transfer_func_pq2084,
- transfer_func_pq2084_interim,
- transfer_func_linear_0_1,
- transfer_func_linear_0_125,
- transfer_func_dolbyvision,
- transfer_func_gamma_22,
- transfer_func_gamma_26
-};
-
struct dc_hdr_static_metadata {
/* display chromaticities and white point in units of 0.00001 */
unsigned int chromaticity_green_x;
@@ -361,9 +352,6 @@ struct dc_hdr_static_metadata {
uint32_t max_luminance;
uint32_t maximum_content_light_level;
uint32_t maximum_frame_average_light_level;
-
- bool hdr_supported;
- bool is_hdr;
};
enum dc_transfer_func_type {
@@ -419,7 +407,6 @@ union surface_update_flags {
/* Medium updates */
uint32_t dcc_change:1;
uint32_t color_space_change:1;
- uint32_t input_tf_change:1;
uint32_t horizontal_mirror_change:1;
uint32_t per_pixel_alpha_change:1;
uint32_t rotation_change:1;
@@ -428,6 +415,7 @@ union surface_update_flags {
uint32_t position_change:1;
uint32_t in_transfer_func_change:1;
uint32_t input_csc_change:1;
+ uint32_t coeff_reduction_change:1;
uint32_t output_tf_change:1;
uint32_t pixel_format_change:1;
@@ -460,7 +448,7 @@ struct dc_plane_state {
struct dc_gamma *gamma_correction;
struct dc_transfer_func *in_transfer_func;
struct dc_bias_and_scale *bias_and_scale;
- struct csc_transform input_csc_color_matrix;
+ struct dc_csc_transform input_csc_color_matrix;
struct fixed31_32 coeff_reduction_factor;
uint32_t sdr_white_level;
@@ -468,7 +456,6 @@ struct dc_plane_state {
struct dc_hdr_static_metadata hdr_static_ctx;
enum dc_color_space color_space;
- enum color_transfer_func input_tf;
enum surface_pixel_format format;
enum dc_rotation_angle rotation;
@@ -498,7 +485,6 @@ struct dc_plane_info {
enum dc_rotation_angle rotation;
enum plane_stereo_format stereo_format;
enum dc_color_space color_space;
- enum color_transfer_func input_tf;
unsigned int sdr_white_level;
bool horizontal_mirror;
bool visible;
@@ -517,19 +503,18 @@ struct dc_surface_update {
struct dc_plane_state *surface;
/* isr safe update parameters. null means no updates */
- struct dc_flip_addrs *flip_addr;
- struct dc_plane_info *plane_info;
- struct dc_scaling_info *scaling_info;
+ const struct dc_flip_addrs *flip_addr;
+ const struct dc_plane_info *plane_info;
+ const struct dc_scaling_info *scaling_info;
/* following updates require alloc/sleep/spin that is not isr safe,
* null means no updates
*/
- struct dc_gamma *gamma;
- enum color_transfer_func color_input_tf;
- struct dc_transfer_func *in_transfer_func;
+ const struct dc_gamma *gamma;
+ const struct dc_transfer_func *in_transfer_func;
- struct csc_transform *input_csc_color_matrix;
- struct fixed31_32 *coeff_reduction_factor;
+ const struct dc_csc_transform *input_csc_color_matrix;
+ const struct fixed31_32 *coeff_reduction_factor;
};
/*
@@ -699,6 +684,7 @@ struct dc_cursor {
struct dc_cursor_attributes attributes;
};
+
/*******************************************************************************
* Interrupt interfaces
******************************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 2726b02e006b..90bccd5ccaa2 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -26,6 +26,8 @@
#ifndef DC_DP_TYPES_H
#define DC_DP_TYPES_H
+#include "os_types.h"
+
enum dc_lane_count {
LANE_COUNT_UNKNOWN = 0,
LANE_COUNT_ONE = 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 48e1fcf53d43..bd0fda0ceb91 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -117,6 +117,65 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr,
return reg_val;
}
+uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6)
+{
+ uint32_t reg_val = dm_read_reg(ctx, addr);
+ *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+ *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+ *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+ *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+ *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+ *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+ return reg_val;
+}
+
+uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+ uint8_t shift7, uint32_t mask7, uint32_t *field_value7)
+{
+ uint32_t reg_val = dm_read_reg(ctx, addr);
+ *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+ *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+ *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+ *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+ *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+ *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+ *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7);
+ return reg_val;
+}
+
+uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+ uint8_t shift7, uint32_t mask7, uint32_t *field_value7,
+ uint8_t shift8, uint32_t mask8, uint32_t *field_value8)
+{
+ uint32_t reg_val = dm_read_reg(ctx, addr);
+ *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+ *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+ *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+ *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+ *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+ *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+ *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7);
+ *field_value8 = get_reg_field_value_ex(reg_val, mask8, shift8);
+ return reg_val;
+}
/* note: va version of this is pretty bad idea, since there is a output parameter pass by pointer
* compiler won't be able to check for size match and is prone to stack corruption type of bugs
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index b83a7dc2f5a9..b1f70579d61b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -423,6 +423,11 @@ enum dc_gamma_type {
GAMMA_CS_TFM_1D = 3,
};
+struct dc_csc_transform {
+ uint16_t matrix[12];
+ bool enable_adjustment;
+};
+
struct dc_gamma {
struct kref refcount;
enum dc_gamma_type type;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index dc34515ef01f..8a716baa1203 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -51,6 +51,14 @@ struct link_mst_stream_allocation_table {
struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
};
+struct time_stamp {
+ uint64_t edp_poweroff;
+ uint64_t edp_poweron;
+};
+
+struct link_trace {
+ struct time_stamp time_stamp;
+};
/*
* A link contains one or more sinks and their connected status.
* The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
@@ -114,6 +122,7 @@ struct dc_link {
struct dc_link_status link_status;
+ struct link_trace link_trace;
};
const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index d017df56b2ba..d7e6d53bb383 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -58,18 +58,20 @@ struct dc_stream_state {
struct freesync_context freesync_ctx;
- struct dc_hdr_static_metadata hdr_static_metadata;
+ struct dc_info_packet hdr_static_metadata;
struct dc_transfer_func *out_transfer_func;
struct colorspace_transform gamut_remap_matrix;
- struct csc_transform csc_color_matrix;
+ struct dc_csc_transform csc_color_matrix;
enum dc_color_space output_color_space;
enum dc_dither_option dither_option;
enum view_3d_format view_format;
- enum color_transfer_func output_tf;
bool ignore_msa_timing_param;
+
+ unsigned long long periodic_fn_vsync_delta;
+
/* TODO: custom INFO packets */
/* TODO: ABM info (DMCU) */
/* PSR info */
@@ -110,9 +112,10 @@ struct dc_stream_update {
struct rect src;
struct rect dst;
struct dc_transfer_func *out_transfer_func;
- struct dc_hdr_static_metadata *hdr_static_metadata;
- enum color_transfer_func color_output_tf;
+ struct dc_info_packet *hdr_static_metadata;
unsigned int *abm_level;
+
+ unsigned long long *periodic_fn_vsync_delta;
};
bool dc_is_stream_unchanged(
@@ -131,13 +134,6 @@ bool dc_is_stream_scaling_unchanged(
* This does not trigger a flip. No surface address is programmed.
*/
-bool dc_commit_planes_to_stream(
- struct dc *dc,
- struct dc_plane_state **plane_states,
- uint8_t new_plane_count,
- struct dc_stream_state *dc_stream,
- struct dc_state *state);
-
void dc_commit_updates_for_stream(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -209,14 +205,6 @@ bool dc_add_all_planes_for_stream(
enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream);
/*
- * This function takes a stream and checks if it is guaranteed to be supported.
- * Guaranteed means that MAX_COFUNC similar streams are supported.
- *
- * After this call:
- * No hardware is programmed for call. Only validation is done.
- */
-
-/*
* Set up streams and links associated to drive sinks
* The streams parameter is an absolute set of all active streams.
*
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 9441305d3ab5..76df2534c4a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -25,7 +25,7 @@
#ifndef DC_TYPES_H_
#define DC_TYPES_H_
-#include "fixed32_32.h"
+#include "os_types.h"
#include "fixed31_32.h"
#include "irq_types.h"
#include "dc_dp_types.h"
@@ -370,12 +370,6 @@ struct dc_csc_adjustments {
struct fixed31_32 hue;
};
-enum {
- MAX_LANES = 2,
- MAX_COFUNC_PATH = 6,
- LAYER_INDEX_PRIMARY = -1,
-};
-
enum dpcd_downstream_port_max_bpc {
DOWN_STREAM_MAX_8BPC = 0,
DOWN_STREAM_MAX_10BPC,
@@ -530,6 +524,15 @@ struct vrr_params {
uint32_t frame_counter;
};
+struct dc_info_packet {
+ bool valid;
+ uint8_t hb0;
+ uint8_t hb1;
+ uint8_t hb2;
+ uint8_t hb3;
+ uint8_t sb[32];
+};
+
#define DC_PLANE_UPDATE_TIMES_MAX 10
struct dc_plane_flip_time {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index fe92a1222803..29294db1a96b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -26,7 +26,7 @@
#include "dce_abm.h"
#include "dm_services.h"
#include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
#include "dc.h"
#include "atom.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 6d5cdcdc8ec9..7f6d724686f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -33,8 +33,9 @@
#define CTX \
aud->base.ctx
-#define DC_LOGGER \
- aud->base.ctx->logger
+
+#define DC_LOGGER_INIT()
+
#define REG(reg)\
(aud->regs->reg)
@@ -348,8 +349,8 @@ static void set_audio_latency(
void dce_aud_az_enable(struct audio *audio)
{
- struct dce_audio *aud = DCE_AUD(audio);
uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
+ DC_LOGGER_INIT();
set_reg_field_value(value, 1,
AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
@@ -371,7 +372,7 @@ void dce_aud_az_enable(struct audio *audio)
void dce_aud_az_disable(struct audio *audio)
{
uint32_t value;
- struct dce_audio *aud = DCE_AUD(audio);
+ DC_LOGGER_INIT();
value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
set_reg_field_value(value, 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 0aa2cda60890..599c7ab6befe 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -41,8 +41,9 @@
#define CTX \
clk_src->base.ctx
-#define DC_LOGGER \
- calc_pll_cs->ctx->logger
+
+#define DC_LOGGER_INIT()
+
#undef FN
#define FN(reg_name, field_name) \
clk_src->cs_shift->field_name, clk_src->cs_mask->field_name
@@ -467,7 +468,7 @@ static uint32_t dce110_get_pix_clk_dividers_helper (
{
uint32_t field = 0;
uint32_t pll_calc_error = MAX_PLL_CALC_ERROR;
- struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
+ DC_LOGGER_INIT();
/* Check if reference clock is external (not pcie/xtalin)
* HW Dce80 spec:
* 00 - PCIE_REFCLK, 01 - XTALIN, 02 - GENERICA, 03 - GENERICB
@@ -557,8 +558,8 @@ static uint32_t dce110_get_pix_clk_dividers(
struct pll_settings *pll_settings)
{
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs);
- struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
uint32_t pll_calc_error = MAX_PLL_CALC_ERROR;
+ DC_LOGGER_INIT();
if (pix_clk_params == NULL || pll_settings == NULL
|| pix_clk_params->requested_pix_clk == 0) {
@@ -589,6 +590,7 @@ static uint32_t dce110_get_pix_clk_dividers(
pll_settings, pix_clk_params);
break;
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
case DCE_VERSION_12_0:
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case DCN_VERSION_1_0:
@@ -655,12 +657,12 @@ static uint32_t dce110_get_d_to_pixel_rate_in_hz(
return 0;
}
- pix_rate = dal_fixed31_32_from_int(clk_src->ref_freq_khz);
- pix_rate = dal_fixed31_32_mul_int(pix_rate, 1000);
- pix_rate = dal_fixed31_32_mul_int(pix_rate, phase);
- pix_rate = dal_fixed31_32_div_int(pix_rate, modulo);
+ pix_rate = dc_fixpt_from_int(clk_src->ref_freq_khz);
+ pix_rate = dc_fixpt_mul_int(pix_rate, 1000);
+ pix_rate = dc_fixpt_mul_int(pix_rate, phase);
+ pix_rate = dc_fixpt_div_int(pix_rate, modulo);
- return dal_fixed31_32_round(pix_rate);
+ return dc_fixpt_round(pix_rate);
} else {
return dce110_get_dp_pixel_rate_from_combo_phy_pll(cs, pix_clk_params, pll_settings);
}
@@ -709,12 +711,12 @@ static bool calculate_ss(
const struct spread_spectrum_data *ss_data,
struct delta_sigma_data *ds_data)
{
- struct fixed32_32 fb_div;
- struct fixed32_32 ss_amount;
- struct fixed32_32 ss_nslip_amount;
- struct fixed32_32 ss_ds_frac_amount;
- struct fixed32_32 ss_step_size;
- struct fixed32_32 modulation_time;
+ struct fixed31_32 fb_div;
+ struct fixed31_32 ss_amount;
+ struct fixed31_32 ss_nslip_amount;
+ struct fixed31_32 ss_ds_frac_amount;
+ struct fixed31_32 ss_step_size;
+ struct fixed31_32 modulation_time;
if (ds_data == NULL)
return false;
@@ -729,42 +731,42 @@ static bool calculate_ss(
/* compute SS_AMOUNT_FBDIV & SS_AMOUNT_NFRAC_SLIP & SS_AMOUNT_DSFRAC*/
/* 6 decimal point support in fractional feedback divider */
- fb_div = dal_fixed32_32_from_fraction(
+ fb_div = dc_fixpt_from_fraction(
pll_settings->fract_feedback_divider, 1000000);
- fb_div = dal_fixed32_32_add_int(fb_div, pll_settings->feedback_divider);
+ fb_div = dc_fixpt_add_int(fb_div, pll_settings->feedback_divider);
ds_data->ds_frac_amount = 0;
/*spreadSpectrumPercentage is in the unit of .01%,
* so have to divided by 100 * 100*/
- ss_amount = dal_fixed32_32_mul(
- fb_div, dal_fixed32_32_from_fraction(ss_data->percentage,
+ ss_amount = dc_fixpt_mul(
+ fb_div, dc_fixpt_from_fraction(ss_data->percentage,
100 * ss_data->percentage_divider));
- ds_data->feedback_amount = dal_fixed32_32_floor(ss_amount);
+ ds_data->feedback_amount = dc_fixpt_floor(ss_amount);
- ss_nslip_amount = dal_fixed32_32_sub(ss_amount,
- dal_fixed32_32_from_int(ds_data->feedback_amount));
- ss_nslip_amount = dal_fixed32_32_mul_int(ss_nslip_amount, 10);
- ds_data->nfrac_amount = dal_fixed32_32_floor(ss_nslip_amount);
+ ss_nslip_amount = dc_fixpt_sub(ss_amount,
+ dc_fixpt_from_int(ds_data->feedback_amount));
+ ss_nslip_amount = dc_fixpt_mul_int(ss_nslip_amount, 10);
+ ds_data->nfrac_amount = dc_fixpt_floor(ss_nslip_amount);
- ss_ds_frac_amount = dal_fixed32_32_sub(ss_nslip_amount,
- dal_fixed32_32_from_int(ds_data->nfrac_amount));
- ss_ds_frac_amount = dal_fixed32_32_mul_int(ss_ds_frac_amount, 65536);
- ds_data->ds_frac_amount = dal_fixed32_32_floor(ss_ds_frac_amount);
+ ss_ds_frac_amount = dc_fixpt_sub(ss_nslip_amount,
+ dc_fixpt_from_int(ds_data->nfrac_amount));
+ ss_ds_frac_amount = dc_fixpt_mul_int(ss_ds_frac_amount, 65536);
+ ds_data->ds_frac_amount = dc_fixpt_floor(ss_ds_frac_amount);
/* compute SS_STEP_SIZE_DSFRAC */
- modulation_time = dal_fixed32_32_from_fraction(
+ modulation_time = dc_fixpt_from_fraction(
pll_settings->reference_freq * 1000,
pll_settings->reference_divider * ss_data->modulation_freq_hz);
if (ss_data->flags.CENTER_SPREAD)
- modulation_time = dal_fixed32_32_div_int(modulation_time, 4);
+ modulation_time = dc_fixpt_div_int(modulation_time, 4);
else
- modulation_time = dal_fixed32_32_div_int(modulation_time, 2);
+ modulation_time = dc_fixpt_div_int(modulation_time, 2);
- ss_step_size = dal_fixed32_32_div(ss_amount, modulation_time);
+ ss_step_size = dc_fixpt_div(ss_amount, modulation_time);
/* SS_STEP_SIZE_DSFRAC_DEC = Int(SS_STEP_SIZE * 2 ^ 16 * 10)*/
- ss_step_size = dal_fixed32_32_mul_int(ss_step_size, 65536 * 10);
- ds_data->ds_frac_size = dal_fixed32_32_floor(ss_step_size);
+ ss_step_size = dc_fixpt_mul_int(ss_step_size, 65536 * 10);
+ ds_data->ds_frac_size = dc_fixpt_floor(ss_step_size);
return true;
}
@@ -978,6 +980,7 @@ static bool dce110_program_pix_clk(
break;
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
case DCE_VERSION_12_0:
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case DCN_VERSION_1_0:
@@ -1054,7 +1057,7 @@ static void get_ss_info_from_atombios(
struct spread_spectrum_info *ss_info_cur;
struct spread_spectrum_data *ss_data_cur;
uint32_t i;
- struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
+ DC_LOGGER_INIT();
if (ss_entries_num == NULL) {
DC_LOG_SYNC(
"Invalid entry !!!\n");
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
index 78e6beb6cf26..8a581c67bf2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
@@ -26,7 +26,7 @@
#include "dce_clocks.h"
#include "dm_services.h"
#include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
#include "bios_parser_interface.h"
#include "dc.h"
#include "dmcu.h"
@@ -35,7 +35,7 @@
#endif
#include "core_types.h"
#include "dc_types.h"
-
+#include "dal_asic_id.h"
#define TO_DCE_CLOCKS(clocks)\
container_of(clocks, struct dce_disp_clk, base)
@@ -228,19 +228,19 @@ static int dce_clocks_get_dp_ref_freq(struct display_clock *clk)
generated according to average value (case as with previous ASICs)
*/
if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) {
- struct fixed32_32 ss_percentage = dal_fixed32_32_div_int(
- dal_fixed32_32_from_fraction(
+ struct fixed31_32 ss_percentage = dc_fixpt_div_int(
+ dc_fixpt_from_fraction(
clk_dce->dprefclk_ss_percentage,
clk_dce->dprefclk_ss_divider), 200);
- struct fixed32_32 adj_dp_ref_clk_khz;
+ struct fixed31_32 adj_dp_ref_clk_khz;
- ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one,
+ ss_percentage = dc_fixpt_sub(dc_fixpt_one,
ss_percentage);
adj_dp_ref_clk_khz =
- dal_fixed32_32_mul_int(
+ dc_fixpt_mul_int(
ss_percentage,
dp_ref_clk_khz);
- dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz);
+ dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz);
}
return dp_ref_clk_khz;
@@ -256,19 +256,19 @@ static int dce_clocks_get_dp_ref_freq_wrkaround(struct display_clock *clk)
int dp_ref_clk_khz = 600000;
if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) {
- struct fixed32_32 ss_percentage = dal_fixed32_32_div_int(
- dal_fixed32_32_from_fraction(
+ struct fixed31_32 ss_percentage = dc_fixpt_div_int(
+ dc_fixpt_from_fraction(
clk_dce->dprefclk_ss_percentage,
clk_dce->dprefclk_ss_divider), 200);
- struct fixed32_32 adj_dp_ref_clk_khz;
+ struct fixed31_32 adj_dp_ref_clk_khz;
- ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one,
+ ss_percentage = dc_fixpt_sub(dc_fixpt_one,
ss_percentage);
adj_dp_ref_clk_khz =
- dal_fixed32_32_mul_int(
+ dc_fixpt_mul_int(
ss_percentage,
dp_ref_clk_khz);
- dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz);
+ dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz);
}
return dp_ref_clk_khz;
@@ -413,9 +413,12 @@ static int dce112_set_clock(
/*VBIOS will determine DPREFCLK frequency, so we don't set it*/
dce_clk_params.target_clock_frequency = 0;
dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
- dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
+ if (!ASICREV_IS_VEGA20_P(clk->ctx->asic_id.hw_internal_rev))
+ dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
(dce_clk_params.pll_id ==
CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
+ else
+ dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
bp->funcs->set_dce_clock(bp, &dce_clk_params);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 2ee3d9bf1062..a576b8bbb3cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -28,7 +28,7 @@
#include "dce_dmcu.h"
#include "dm_services.h"
#include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
#include "dc.h"
#define TO_DCE_DMCU(dmcu)\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
index 487724345d9d..0275d6d60da4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
@@ -53,7 +53,8 @@ void dce_pipe_control_lock(struct dc *dc,
struct dce_hwseq *hws = dc->hwseq;
/* Not lock pipe when blank */
- if (lock && pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg))
+ if (lock && pipe->stream_res.tg->funcs->is_blanked &&
+ pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg))
return;
val = REG_GET_4(BLND_V_UPDATE_LOCK[pipe->stream_res.tg->inst],
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
index d737e911971b..5d9506b3d46b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
@@ -195,13 +195,13 @@ static void dce_ipp_program_input_lut(
for (i = 0; i < gamma->num_entries; i++) {
REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.red[i]));
REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.green[i]));
REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.blue[i]));
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 8167cad7bcf7..dbe3b26b6d9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -113,6 +113,7 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = {
.connect_dig_be_to_fe = dce110_link_encoder_connect_dig_be_to_fe,
.enable_hpd = dce110_link_encoder_enable_hpd,
.disable_hpd = dce110_link_encoder_disable_hpd,
+ .is_dig_enabled = dce110_is_dig_enabled,
.destroy = dce110_link_encoder_destroy
};
@@ -535,8 +536,9 @@ void dce110_psr_program_secondary_packet(struct link_encoder *enc,
DP_SEC_GSP0_PRIORITY, 1);
}
-static bool is_dig_enabled(const struct dce110_link_encoder *enc110)
+bool dce110_is_dig_enabled(struct link_encoder *enc)
{
+ struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
uint32_t value;
REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value);
@@ -1031,7 +1033,7 @@ void dce110_link_encoder_disable_output(
struct bp_transmitter_control cntl = { 0 };
enum bp_result result;
- if (!is_dig_enabled(enc110)) {
+ if (!dce110_is_dig_enabled(enc)) {
/* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
return;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 0ec3433d34b6..347069461a22 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -263,4 +263,6 @@ void dce110_psr_program_dp_dphy_fast_training(struct link_encoder *enc,
void dce110_psr_program_secondary_packet(struct link_encoder *enc,
unsigned int sdp_transmit_line_num_deadline);
+bool dce110_is_dig_enabled(struct link_encoder *enc);
+
#endif /* __DC_LINK_ENCODER__DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 0790f25c7b3b..b235a75355b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -174,6 +174,25 @@ static void program_urgency_watermark(
URGENCY_HIGH_WATERMARK, urgency_high_wm);
}
+static void dce120_program_urgency_watermark(
+ struct dce_mem_input *dce_mi,
+ uint32_t wm_select,
+ uint32_t urgency_low_wm,
+ uint32_t urgency_high_wm)
+{
+ REG_UPDATE(DPG_WATERMARK_MASK_CONTROL,
+ URGENCY_WATERMARK_MASK, wm_select);
+
+ REG_SET_2(DPG_PIPE_URGENCY_CONTROL, 0,
+ URGENCY_LOW_WATERMARK, urgency_low_wm,
+ URGENCY_HIGH_WATERMARK, urgency_high_wm);
+
+ REG_SET_2(DPG_PIPE_URGENT_LEVEL_CONTROL, 0,
+ URGENT_LEVEL_LOW_WATERMARK, urgency_low_wm,
+ URGENT_LEVEL_HIGH_WATERMARK, urgency_high_wm);
+
+}
+
static void program_nbp_watermark(
struct dce_mem_input *dce_mi,
uint32_t wm_select,
@@ -206,6 +225,25 @@ static void program_nbp_watermark(
}
}
+static void dce120_program_stutter_watermark(
+ struct dce_mem_input *dce_mi,
+ uint32_t wm_select,
+ uint32_t stutter_mark,
+ uint32_t stutter_entry)
+{
+ REG_UPDATE(DPG_WATERMARK_MASK_CONTROL,
+ STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK, wm_select);
+
+ if (REG(DPG_PIPE_STUTTER_CONTROL2))
+ REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL2,
+ STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark,
+ STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry);
+ else
+ REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL,
+ STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark,
+ STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry);
+}
+
static void program_stutter_watermark(
struct dce_mem_input *dce_mi,
uint32_t wm_select,
@@ -225,7 +263,8 @@ static void program_stutter_watermark(
static void dce_mi_program_display_marks(
struct mem_input *mi,
struct dce_watermarks nbp,
- struct dce_watermarks stutter,
+ struct dce_watermarks stutter_exit,
+ struct dce_watermarks stutter_enter,
struct dce_watermarks urgent,
uint32_t total_dest_line_time_ns)
{
@@ -243,13 +282,14 @@ static void dce_mi_program_display_marks(
program_nbp_watermark(dce_mi, 2, nbp.a_mark); /* set a */
program_nbp_watermark(dce_mi, 1, nbp.d_mark); /* set d */
- program_stutter_watermark(dce_mi, 2, stutter.a_mark); /* set a */
- program_stutter_watermark(dce_mi, 1, stutter.d_mark); /* set d */
+ program_stutter_watermark(dce_mi, 2, stutter_exit.a_mark); /* set a */
+ program_stutter_watermark(dce_mi, 1, stutter_exit.d_mark); /* set d */
}
-static void dce120_mi_program_display_marks(struct mem_input *mi,
+static void dce112_mi_program_display_marks(struct mem_input *mi,
struct dce_watermarks nbp,
- struct dce_watermarks stutter,
+ struct dce_watermarks stutter_exit,
+ struct dce_watermarks stutter_entry,
struct dce_watermarks urgent,
uint32_t total_dest_line_time_ns)
{
@@ -273,10 +313,43 @@ static void dce120_mi_program_display_marks(struct mem_input *mi,
program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */
program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */
- program_stutter_watermark(dce_mi, 0, stutter.a_mark); /* set a */
- program_stutter_watermark(dce_mi, 1, stutter.b_mark); /* set b */
- program_stutter_watermark(dce_mi, 2, stutter.c_mark); /* set c */
- program_stutter_watermark(dce_mi, 3, stutter.d_mark); /* set d */
+ program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark); /* set a */
+ program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark); /* set b */
+ program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark); /* set c */
+ program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark); /* set d */
+}
+
+static void dce120_mi_program_display_marks(struct mem_input *mi,
+ struct dce_watermarks nbp,
+ struct dce_watermarks stutter_exit,
+ struct dce_watermarks stutter_entry,
+ struct dce_watermarks urgent,
+ uint32_t total_dest_line_time_ns)
+{
+ struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
+ uint32_t stutter_en = mi->ctx->dc->debug.disable_stutter ? 0 : 1;
+
+ dce120_program_urgency_watermark(dce_mi, 0, /* set a */
+ urgent.a_mark, total_dest_line_time_ns);
+ dce120_program_urgency_watermark(dce_mi, 1, /* set b */
+ urgent.b_mark, total_dest_line_time_ns);
+ dce120_program_urgency_watermark(dce_mi, 2, /* set c */
+ urgent.c_mark, total_dest_line_time_ns);
+ dce120_program_urgency_watermark(dce_mi, 3, /* set d */
+ urgent.d_mark, total_dest_line_time_ns);
+
+ REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL,
+ STUTTER_ENABLE, stutter_en,
+ STUTTER_IGNORE_FBC, 1);
+ program_nbp_watermark(dce_mi, 0, nbp.a_mark); /* set a */
+ program_nbp_watermark(dce_mi, 1, nbp.b_mark); /* set b */
+ program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */
+ program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */
+
+ dce120_program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark, stutter_entry.a_mark); /* set a */
+ dce120_program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark, stutter_entry.b_mark); /* set b */
+ dce120_program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark, stutter_entry.c_mark); /* set c */
+ dce120_program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark, stutter_entry.d_mark); /* set d */
}
static void program_tiling(
@@ -696,5 +769,17 @@ void dce112_mem_input_construct(
const struct dce_mem_input_mask *mi_mask)
{
dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask);
+ dce_mi->base.funcs->mem_input_program_display_marks = dce112_mi_program_display_marks;
+}
+
+void dce120_mem_input_construct(
+ struct dce_mem_input *dce_mi,
+ struct dc_context *ctx,
+ int inst,
+ const struct dce_mem_input_registers *regs,
+ const struct dce_mem_input_shift *mi_shift,
+ const struct dce_mem_input_mask *mi_mask)
+{
+ dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask);
dce_mi->base.funcs->mem_input_program_display_marks = dce120_mi_program_display_marks;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
index 05d39c0cbe87..d15b0d7f47fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
@@ -106,6 +106,7 @@ struct dce_mem_input_registers {
uint32_t DPG_PIPE_ARBITRATION_CONTROL1;
uint32_t DPG_WATERMARK_MASK_CONTROL;
uint32_t DPG_PIPE_URGENCY_CONTROL;
+ uint32_t DPG_PIPE_URGENT_LEVEL_CONTROL;
uint32_t DPG_PIPE_NB_PSTATE_CHANGE_CONTROL;
uint32_t DPG_PIPE_LOW_POWER_CONTROL;
uint32_t DPG_PIPE_STUTTER_CONTROL;
@@ -213,6 +214,11 @@ struct dce_mem_input_registers {
#define MI_DCE12_DMIF_PG_MASK_SH_LIST(mask_sh, blk)\
SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_EXIT_SELF_REFRESH_WATERMARK, mask_sh),\
+ SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_ENTER_SELF_REFRESH_WATERMARK, mask_sh),\
+ SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_LOW_WATERMARK, mask_sh),\
+ SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_HIGH_WATERMARK, mask_sh),\
+ SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, mask_sh),\
+ SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, mask_sh),\
SFB(blk, DPG_WATERMARK_MASK_CONTROL, PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\
SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_ENABLE, mask_sh),\
SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_URGENT_DURING_REQUEST, mask_sh),\
@@ -286,6 +292,8 @@ struct dce_mem_input_registers {
type STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK; \
type URGENCY_LOW_WATERMARK; \
type URGENCY_HIGH_WATERMARK; \
+ type URGENT_LEVEL_LOW_WATERMARK;\
+ type URGENT_LEVEL_HIGH_WATERMARK;\
type NB_PSTATE_CHANGE_ENABLE; \
type NB_PSTATE_CHANGE_URGENT_DURING_REQUEST; \
type NB_PSTATE_CHANGE_NOT_SELF_REFRESH_DURING_REQUEST; \
@@ -297,6 +305,7 @@ struct dce_mem_input_registers {
type STUTTER_ENABLE; \
type STUTTER_IGNORE_FBC; \
type STUTTER_EXIT_SELF_REFRESH_WATERMARK; \
+ type STUTTER_ENTER_SELF_REFRESH_WATERMARK; \
type DMIF_BUFFERS_ALLOCATED; \
type DMIF_BUFFERS_ALLOCATION_COMPLETED; \
type ENABLE; /* MC_HUB_RDREQ_DMIF_LIMIT */\
@@ -344,4 +353,12 @@ void dce112_mem_input_construct(
const struct dce_mem_input_shift *mi_shift,
const struct dce_mem_input_mask *mi_mask);
+void dce120_mem_input_construct(
+ struct dce_mem_input *dce_mi,
+ struct dc_context *ctx,
+ int inst,
+ const struct dce_mem_input_registers *regs,
+ const struct dce_mem_input_shift *mi_shift,
+ const struct dce_mem_input_mask *mi_mask);
+
#endif /*__DCE_MEM_INPUT_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
index 6243450b41b7..48862bebf29e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
@@ -1014,11 +1014,11 @@ static const uint16_t filter_8tap_64p_183[264] = {
const uint16_t *get_filter_3tap_16p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_3tap_16p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_3tap_16p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_3tap_16p_150;
else
return filter_3tap_16p_183;
@@ -1026,11 +1026,11 @@ const uint16_t *get_filter_3tap_16p(struct fixed31_32 ratio)
const uint16_t *get_filter_3tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_3tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_3tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_3tap_64p_150;
else
return filter_3tap_64p_183;
@@ -1038,11 +1038,11 @@ const uint16_t *get_filter_3tap_64p(struct fixed31_32 ratio)
const uint16_t *get_filter_4tap_16p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_4tap_16p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_4tap_16p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_4tap_16p_150;
else
return filter_4tap_16p_183;
@@ -1050,11 +1050,11 @@ const uint16_t *get_filter_4tap_16p(struct fixed31_32 ratio)
const uint16_t *get_filter_4tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_4tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_4tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_4tap_64p_150;
else
return filter_4tap_64p_183;
@@ -1062,11 +1062,11 @@ const uint16_t *get_filter_4tap_64p(struct fixed31_32 ratio)
const uint16_t *get_filter_5tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_5tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_5tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_5tap_64p_150;
else
return filter_5tap_64p_183;
@@ -1074,11 +1074,11 @@ const uint16_t *get_filter_5tap_64p(struct fixed31_32 ratio)
const uint16_t *get_filter_6tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_6tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_6tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_6tap_64p_150;
else
return filter_6tap_64p_183;
@@ -1086,11 +1086,11 @@ const uint16_t *get_filter_6tap_64p(struct fixed31_32 ratio)
const uint16_t *get_filter_7tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_7tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_7tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_7tap_64p_150;
else
return filter_7tap_64p_183;
@@ -1098,11 +1098,11 @@ const uint16_t *get_filter_7tap_64p(struct fixed31_32 ratio)
const uint16_t *get_filter_8tap_64p(struct fixed31_32 ratio)
{
- if (ratio.value < dal_fixed31_32_one.value)
+ if (ratio.value < dc_fixpt_one.value)
return filter_8tap_64p_upscale;
- else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
return filter_8tap_64p_117;
- else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+ else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
return filter_8tap_64p_150;
else
return filter_8tap_64p_183;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 162f6a6c4208..0a6d483dc046 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -26,27 +26,10 @@
#include "dc_bios_types.h"
#include "dce_stream_encoder.h"
#include "reg_helper.h"
+#include "hw_shared.h"
+
#define DC_LOGGER \
enc110->base.ctx->logger
-enum DP_PIXEL_ENCODING {
-DP_PIXEL_ENCODING_RGB444 = 0x00000000,
-DP_PIXEL_ENCODING_YCBCR422 = 0x00000001,
-DP_PIXEL_ENCODING_YCBCR444 = 0x00000002,
-DP_PIXEL_ENCODING_RGB_WIDE_GAMUT = 0x00000003,
-DP_PIXEL_ENCODING_Y_ONLY = 0x00000004,
-DP_PIXEL_ENCODING_YCBCR420 = 0x00000005,
-DP_PIXEL_ENCODING_RESERVED = 0x00000006,
-};
-
-
-enum DP_COMPONENT_DEPTH {
-DP_COMPONENT_DEPTH_6BPC = 0x00000000,
-DP_COMPONENT_DEPTH_8BPC = 0x00000001,
-DP_COMPONENT_DEPTH_10BPC = 0x00000002,
-DP_COMPONENT_DEPTH_12BPC = 0x00000003,
-DP_COMPONENT_DEPTH_16BPC = 0x00000004,
-DP_COMPONENT_DEPTH_RESERVED = 0x00000005,
-};
#define REG(reg)\
@@ -80,7 +63,7 @@ enum {
static void dce110_update_generic_info_packet(
struct dce110_stream_encoder *enc110,
uint32_t packet_index,
- const struct encoder_info_packet *info_packet)
+ const struct dc_info_packet *info_packet)
{
uint32_t regval;
/* TODOFPGA Figure out a proper number for max_retries polling for lock
@@ -196,7 +179,7 @@ static void dce110_update_generic_info_packet(
static void dce110_update_hdmi_info_packet(
struct dce110_stream_encoder *enc110,
uint32_t packet_index,
- const struct encoder_info_packet *info_packet)
+ const struct dc_info_packet *info_packet)
{
uint32_t cont, send, line;
@@ -314,11 +297,11 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
switch (crtc_timing->pixel_encoding) {
case PIXEL_ENCODING_YCBCR422:
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
- DP_PIXEL_ENCODING_YCBCR422);
+ DP_PIXEL_ENCODING_TYPE_YCBCR422);
break;
case PIXEL_ENCODING_YCBCR444:
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
- DP_PIXEL_ENCODING_YCBCR444);
+ DP_PIXEL_ENCODING_TYPE_YCBCR444);
if (crtc_timing->flags.Y_ONLY)
if (crtc_timing->display_color_depth != COLOR_DEPTH_666)
@@ -326,7 +309,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
* Color depth of Y-only could be
* 8, 10, 12, 16 bits */
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
- DP_PIXEL_ENCODING_Y_ONLY);
+ DP_PIXEL_ENCODING_TYPE_Y_ONLY);
/* Note: DP_MSA_MISC1 bit 7 is the indicator
* of Y-only mode.
* This bit is set in HW if register
@@ -334,7 +317,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
case PIXEL_ENCODING_YCBCR420:
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
- DP_PIXEL_ENCODING_YCBCR420);
+ DP_PIXEL_ENCODING_TYPE_YCBCR420);
if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN)
REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1);
@@ -345,7 +328,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
default:
REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
- DP_PIXEL_ENCODING_RGB444);
+ DP_PIXEL_ENCODING_TYPE_RGB444);
break;
}
@@ -363,20 +346,20 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
case COLOR_DEPTH_888:
REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
- DP_COMPONENT_DEPTH_8BPC);
+ DP_COMPONENT_PIXEL_DEPTH_8BPC);
break;
case COLOR_DEPTH_101010:
REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
- DP_COMPONENT_DEPTH_10BPC);
+ DP_COMPONENT_PIXEL_DEPTH_10BPC);
break;
case COLOR_DEPTH_121212:
REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
- DP_COMPONENT_DEPTH_12BPC);
+ DP_COMPONENT_PIXEL_DEPTH_12BPC);
break;
default:
REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
- DP_COMPONENT_DEPTH_6BPC);
+ DP_COMPONENT_PIXEL_DEPTH_6BPC);
break;
}
@@ -700,11 +683,11 @@ static void dce110_stream_encoder_set_mst_bandwidth(
struct fixed31_32 avg_time_slots_per_mtp)
{
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
- uint32_t x = dal_fixed31_32_floor(
+ uint32_t x = dc_fixpt_floor(
avg_time_slots_per_mtp);
- uint32_t y = dal_fixed31_32_ceil(
- dal_fixed31_32_shl(
- dal_fixed31_32_sub_int(
+ uint32_t y = dc_fixpt_ceil(
+ dc_fixpt_shl(
+ dc_fixpt_sub_int(
avg_time_slots_per_mtp,
x),
26));
@@ -836,7 +819,7 @@ static void dce110_stream_encoder_update_dp_info_packets(
const struct encoder_info_frame *info_frame)
{
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
- uint32_t value = REG_READ(DP_SEC_CNTL);
+ uint32_t value = 0;
if (info_frame->vsc.valid)
dce110_update_generic_info_packet(
@@ -870,6 +853,7 @@ static void dce110_stream_encoder_update_dp_info_packets(
* Therefore we need to enable master bit
* if at least on of the fields is not 0
*/
+ value = REG_READ(DP_SEC_CNTL);
if (value)
REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
}
@@ -879,7 +863,7 @@ static void dce110_stream_encoder_stop_dp_info_packets(
{
/* stop generic packets on DP */
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
- uint32_t value = REG_READ(DP_SEC_CNTL);
+ uint32_t value = 0;
if (enc110->se_mask->DP_SEC_AVI_ENABLE) {
REG_SET_7(DP_SEC_CNTL, 0,
@@ -892,25 +876,10 @@ static void dce110_stream_encoder_stop_dp_info_packets(
DP_SEC_STREAM_ENABLE, 0);
}
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
- if (enc110->se_mask->DP_SEC_GSP7_ENABLE) {
- REG_SET_10(DP_SEC_CNTL, 0,
- DP_SEC_GSP0_ENABLE, 0,
- DP_SEC_GSP1_ENABLE, 0,
- DP_SEC_GSP2_ENABLE, 0,
- DP_SEC_GSP3_ENABLE, 0,
- DP_SEC_GSP4_ENABLE, 0,
- DP_SEC_GSP5_ENABLE, 0,
- DP_SEC_GSP6_ENABLE, 0,
- DP_SEC_GSP7_ENABLE, 0,
- DP_SEC_MPG_ENABLE, 0,
- DP_SEC_STREAM_ENABLE, 0);
- }
-#endif
/* this register shared with audio info frame.
* therefore we need to keep master enabled
* if at least one of the fields is not 0 */
-
+ value = REG_READ(DP_SEC_CNTL);
if (value)
REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
@@ -1513,7 +1482,7 @@ static void dce110_se_disable_dp_audio(
struct stream_encoder *enc)
{
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
- uint32_t value = REG_READ(DP_SEC_CNTL);
+ uint32_t value = 0;
/* Disable Audio packets */
REG_UPDATE_5(DP_SEC_CNTL,
@@ -1525,6 +1494,7 @@ static void dce110_se_disable_dp_audio(
/* This register shared with encoder info frame. Therefore we need to
keep master enabled if at least on of the fields is not 0 */
+ value = REG_READ(DP_SEC_CNTL);
if (value != 0)
REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
index 832c5daada35..a02e719d7794 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
@@ -41,7 +41,7 @@
#define DC_LOGGER \
xfm_dce->base.ctx->logger
-#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19))
+#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19))
#define GAMUT_MATRIX_SIZE 12
#define SCL_PHASES 16
@@ -256,27 +256,27 @@ static void calculate_inits(
struct fixed31_32 v_init;
inits->h_int_scale_ratio =
- dal_fixed31_32_u2d19(data->ratios.horz) << 5;
+ dc_fixpt_u2d19(data->ratios.horz) << 5;
inits->v_int_scale_ratio =
- dal_fixed31_32_u2d19(data->ratios.vert) << 5;
+ dc_fixpt_u2d19(data->ratios.vert) << 5;
h_init =
- dal_fixed31_32_div_int(
- dal_fixed31_32_add(
+ dc_fixpt_div_int(
+ dc_fixpt_add(
data->ratios.horz,
- dal_fixed31_32_from_int(data->taps.h_taps + 1)),
+ dc_fixpt_from_int(data->taps.h_taps + 1)),
2);
- inits->h_init.integer = dal_fixed31_32_floor(h_init);
- inits->h_init.fraction = dal_fixed31_32_u0d19(h_init) << 5;
+ inits->h_init.integer = dc_fixpt_floor(h_init);
+ inits->h_init.fraction = dc_fixpt_u0d19(h_init) << 5;
v_init =
- dal_fixed31_32_div_int(
- dal_fixed31_32_add(
+ dc_fixpt_div_int(
+ dc_fixpt_add(
data->ratios.vert,
- dal_fixed31_32_from_int(data->taps.v_taps + 1)),
+ dc_fixpt_from_int(data->taps.v_taps + 1)),
2);
- inits->v_init.integer = dal_fixed31_32_floor(v_init);
- inits->v_init.fraction = dal_fixed31_32_u0d19(v_init) << 5;
+ inits->v_init.integer = dc_fixpt_floor(v_init);
+ inits->v_init.fraction = dc_fixpt_u0d19(v_init) << 5;
}
static void program_scl_ratios_inits(
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 3092f76bdb75..38ec0d609297 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -733,38 +733,6 @@ enum dc_status dce100_add_stream_to_ctx(
return result;
}
-enum dc_status dce100_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *dc_stream,
- struct dc_state *context)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- context->streams[0] = dc_stream;
- dc_stream_retain(context->streams[0]);
- context->stream_count++;
-
- result = resource_map_pool_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = resource_map_clock_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = build_mapped_resource(dc, context, dc_stream);
-
- if (result == DC_OK) {
- validate_guaranteed_copy_streams(
- context, dc->caps.max_streams);
- result = resource_build_scaling_params_for_context(dc, context);
- }
-
- if (result == DC_OK)
- if (!dce100_validate_bandwidth(dc, context))
- result = DC_FAIL_BANDWIDTH_VALIDATE;
-
- return result;
-}
-
static void dce100_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -786,7 +754,6 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s
static const struct resource_funcs dce100_res_pool_funcs = {
.destroy = dce100_destroy_resource_pool,
.link_enc_create = dce100_link_encoder_create,
- .validate_guaranteed = dce100_validate_guaranteed,
.validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index d0575999f172..a92fb0aa2ff3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -70,8 +70,9 @@
#define CTX \
hws->ctx
-#define DC_LOGGER \
- ctx->logger
+
+#define DC_LOGGER_INIT()
+
#define REG(reg)\
hws->regs->reg
@@ -279,7 +280,9 @@ dce110_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
build_prescale_params(&prescale_params, plane_state);
ipp->funcs->ipp_program_prescale(ipp, &prescale_params);
- if (plane_state->gamma_correction && dce_use_lut(plane_state->format))
+ if (plane_state->gamma_correction &&
+ !plane_state->gamma_correction->is_identity &&
+ dce_use_lut(plane_state->format))
ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction);
if (tf == NULL) {
@@ -506,19 +509,19 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
- arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_start));
- arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_end));
+ arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_start));
+ arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_end));
y_r = rgb_resulted[0].red;
y_g = rgb_resulted[0].green;
y_b = rgb_resulted[0].blue;
- y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+ y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
arr_points[0].y = y1_min;
- arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y,
+ arr_points[0].slope = dc_fixpt_div(arr_points[0].y,
arr_points[0].x);
y_r = rgb_resulted[hw_points - 1].red;
@@ -528,21 +531,21 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
/* see comment above, m_arrPoints[1].y should be the Y value for the
* region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
*/
- y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+ y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
arr_points[1].y = y3_max;
- arr_points[1].slope = dal_fixed31_32_zero;
+ arr_points[1].slope = dc_fixpt_zero;
if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
/* for PQ, we want to have a straight line from last HW X point,
* and the slope to be such that we hit 1.0 at 10000 nits.
*/
- const struct fixed31_32 end_value = dal_fixed31_32_from_int(125);
+ const struct fixed31_32 end_value = dc_fixpt_from_int(125);
- arr_points[1].slope = dal_fixed31_32_div(
- dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
- dal_fixed31_32_sub(end_value, arr_points[1].x));
+ arr_points[1].slope = dc_fixpt_div(
+ dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+ dc_fixpt_sub(end_value, arr_points[1].x));
}
regamma_params->hw_points_num = hw_points;
@@ -566,16 +569,16 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
i = 1;
while (i != hw_points + 1) {
- if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+ if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
rgb_plus_1->red = rgb->red;
- if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+ if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
rgb_plus_1->green = rgb->green;
- if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+ if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
rgb_plus_1->blue = rgb->blue;
- rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue);
+ rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
+ rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+ rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
++rgb_plus_1;
++rgb;
@@ -851,6 +854,28 @@ void hwss_edp_power_control(
if (power_up != is_panel_powered_on(hwseq)) {
/* Send VBIOS command to prompt eDP panel power */
+ if (power_up) {
+ unsigned long long current_ts = dm_get_timestamp(ctx);
+ unsigned long long duration_in_ms =
+ dm_get_elapse_time_in_ns(
+ ctx,
+ current_ts,
+ div64_u64(link->link_trace.time_stamp.edp_poweroff, 1000000));
+ unsigned long long wait_time_ms = 0;
+
+ /* max 500ms from LCDVDD off to on */
+ if (link->link_trace.time_stamp.edp_poweroff == 0)
+ wait_time_ms = 500;
+ else if (duration_in_ms < 500)
+ wait_time_ms = 500 - duration_in_ms;
+
+ if (wait_time_ms) {
+ msleep(wait_time_ms);
+ dm_output_to_console("%s: wait %lld ms to power on eDP.\n",
+ __func__, wait_time_ms);
+ }
+
+ }
DC_LOG_HW_RESUME_S3(
"%s: Panel Power action: %s\n",
@@ -864,9 +889,14 @@ void hwss_edp_power_control(
cntl.coherent = false;
cntl.lanes_number = LANE_COUNT_FOUR;
cntl.hpd_sel = link->link_enc->hpd_source;
-
bp_result = link_transmitter_control(ctx->dc_bios, &cntl);
+ if (!power_up)
+ /*save driver power off time stamp*/
+ link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx);
+ else
+ link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx);
+
if (bp_result != BP_RESULT_OK)
DC_LOG_ERROR(
"%s: Panel Power bp_result: %d\n",
@@ -1011,7 +1041,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
link->dc->hwss.edp_backlight_control(link, true);
- stream->bl_pwm_level = 0;
+ stream->bl_pwm_level = EDP_BACKLIGHT_RAMP_DISABLE_LEVEL;
}
}
void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
@@ -1203,7 +1233,7 @@ static void program_scaler(const struct dc *dc,
&pipe_ctx->plane_res.scl_data);
}
-static enum dc_status dce110_prog_pixclk_crtc_otg(
+static enum dc_status dce110_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
struct dc *dc)
@@ -1269,7 +1299,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx[pipe_ctx->pipe_idx];
/* */
- dc->hwss.prog_pixclk_crtc_otg(pipe_ctx, context, dc);
+ dc->hwss.enable_stream_timing(pipe_ctx, context, dc);
/* FPGA does not program backend */
if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
@@ -1441,6 +1471,17 @@ static void disable_vga_and_power_gate_all_controllers(
}
}
+static struct dc_link *get_link_for_edp(struct dc *dc)
+{
+ int i;
+
+ for (i = 0; i < dc->link_count; i++) {
+ if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
+ return dc->links[i];
+ }
+ return NULL;
+}
+
static struct dc_link *get_link_for_edp_not_in_use(
struct dc *dc,
struct dc_state *context)
@@ -1475,20 +1516,21 @@ static struct dc_link *get_link_for_edp_not_in_use(
*/
void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
{
- struct dc_bios *dcb = dc->ctx->dc_bios;
-
- /* vbios already light up eDP, so we can leverage vbios and skip eDP
- * programming
- */
- bool can_eDP_fast_boot_optimize =
- (dcb->funcs->get_vga_enabled_displays(dc->ctx->dc_bios) == ATOM_DISPLAY_LCD1_ACTIVE);
-
- /* if OS doesn't light up eDP and eDP link is available, we want to disable */
struct dc_link *edp_link_to_turnoff = NULL;
+ struct dc_link *edp_link = get_link_for_edp(dc);
+ bool can_eDP_fast_boot_optimize = false;
+
+ if (edp_link) {
+ can_eDP_fast_boot_optimize =
+ edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc);
+ }
if (can_eDP_fast_boot_optimize) {
edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context);
+ /* if OS doesn't light up eDP and eDP link is available, we want to disable
+ * If resume from S4/S5, should optimization.
+ */
if (!edp_link_to_turnoff)
dc->apply_edp_fast_boot_optimization = true;
}
@@ -1544,6 +1586,7 @@ static void dce110_set_displaymarks(
pipe_ctx->plane_res.mi,
context->bw.dce.nbp_state_change_wm_ns[num_pipes],
context->bw.dce.stutter_exit_wm_ns[num_pipes],
+ context->bw.dce.stutter_entry_wm_ns[num_pipes],
context->bw.dce.urgent_wm_ns[num_pipes],
total_dest_line_time_ns);
if (i == underlay_idx) {
@@ -1569,6 +1612,7 @@ static void set_safe_displaymarks(
MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK };
struct dce_watermarks nbp_marks = {
SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK };
+ struct dce_watermarks min_marks = { 0, 0, 0, 0};
for (i = 0; i < MAX_PIPES; i++) {
if (res_ctx->pipe_ctx[i].stream == NULL || res_ctx->pipe_ctx[i].plane_res.mi == NULL)
@@ -1578,6 +1622,7 @@ static void set_safe_displaymarks(
res_ctx->pipe_ctx[i].plane_res.mi,
nbp_marks,
max_marks,
+ min_marks,
max_marks,
MAX_WATERMARK);
@@ -1803,6 +1848,9 @@ static bool should_enable_fbc(struct dc *dc,
}
}
+ /* Pipe context should be found */
+ ASSERT(pipe_ctx);
+
/* Only supports eDP */
if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
return false;
@@ -2221,74 +2269,6 @@ static void program_gamut_remap(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
}
-
-/**
- * TODO REMOVE, USE UPDATE INSTEAD
- */
-static void set_plane_config(
- const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct resource_context *res_ctx)
-{
- struct mem_input *mi = pipe_ctx->plane_res.mi;
- struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct xfm_grph_csc_adjustment adjust;
- struct out_csc_color_matrix tbl_entry;
- unsigned int i;
-
- memset(&adjust, 0, sizeof(adjust));
- memset(&tbl_entry, 0, sizeof(tbl_entry));
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
- dce_enable_fe_clock(dc->hwseq, mi->inst, true);
-
- set_default_colors(pipe_ctx);
- if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
- tbl_entry.color_space =
- pipe_ctx->stream->output_color_space;
-
- for (i = 0; i < 12; i++)
- tbl_entry.regval[i] =
- pipe_ctx->stream->csc_color_matrix.matrix[i];
-
- pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment
- (pipe_ctx->plane_res.xfm, &tbl_entry);
- }
-
- if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
- adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-
- for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
- adjust.temperature_matrix[i] =
- pipe_ctx->stream->gamut_remap_matrix.matrix[i];
- }
-
- pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
-
- pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
- program_scaler(dc, pipe_ctx);
-
- program_surface_visibility(dc, pipe_ctx);
-
- mi->funcs->mem_input_program_surface_config(
- mi,
- plane_state->format,
- &plane_state->tiling_info,
- &plane_state->plane_size,
- plane_state->rotation,
- NULL,
- false);
- if (mi->funcs->set_blank)
- mi->funcs->set_blank(mi, pipe_ctx->plane_state->visible);
-
- if (dc->config.gpu_vm_support)
- mi->funcs->mem_input_program_pte_vm(
- pipe_ctx->plane_res.mi,
- plane_state->format,
- &plane_state->tiling_info,
- plane_state->rotation);
-}
-
static void update_plane_addr(const struct dc *dc,
struct pipe_ctx *pipe_ctx)
{
@@ -2699,8 +2679,11 @@ static void dce110_program_front_end_for_pipe(
struct dc_plane_state *plane_state = pipe_ctx->plane_state;
struct xfm_grph_csc_adjustment adjust;
struct out_csc_color_matrix tbl_entry;
+#if defined(CONFIG_DRM_AMD_DC_FBC)
+ unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
+#endif
unsigned int i;
- struct dc_context *ctx = dc->ctx;
+ DC_LOGGER_INIT();
memset(&tbl_entry, 0, sizeof(tbl_entry));
if (dc->current_state)
@@ -2740,7 +2723,9 @@ static void dce110_program_front_end_for_pipe(
program_scaler(dc, pipe_ctx);
#if defined(CONFIG_DRM_AMD_DC_FBC)
- if (dc->fbc_compressor && old_pipe->stream) {
+ /* fbc not applicable on Underlay pipe */
+ if (dc->fbc_compressor && old_pipe->stream &&
+ pipe_ctx->pipe_idx != underlay_idx) {
if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
else
@@ -2776,13 +2761,13 @@ static void dce110_program_front_end_for_pipe(
dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream);
DC_LOG_SURFACE(
- "Pipe:%d 0x%x: addr hi:0x%x, "
+ "Pipe:%d %p: addr hi:0x%x, "
"addr low:0x%x, "
"src: %d, %d, %d,"
" %d; dst: %d, %d, %d, %d;"
"clip: %d, %d, %d, %d\n",
pipe_ctx->pipe_idx,
- pipe_ctx->plane_state,
+ (void *) pipe_ctx->plane_state,
pipe_ctx->plane_state->address.grph.addr.high_part,
pipe_ctx->plane_state->address.grph.addr.low_part,
pipe_ctx->plane_state->src_rect.x,
@@ -2970,7 +2955,6 @@ static const struct hw_sequencer_funcs dce110_funcs = {
.init_hw = init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = dce110_apply_ctx_for_surface,
- .set_plane_config = set_plane_config,
.update_plane_addr = update_plane_addr,
.update_pending_status = dce110_update_pending_status,
.set_input_transfer_func = dce110_set_input_transfer_func,
@@ -2993,7 +2977,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
.get_position = get_position,
.set_static_screen_control = set_static_screen_control,
.reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
- .prog_pixclk_crtc_otg = dce110_prog_pixclk_crtc_otg,
+ .enable_stream_timing = dce110_enable_stream_timing,
.setup_stereo = NULL,
.set_avmute = dce110_set_avmute,
.wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
index 7bab8c6d2a73..0564c8e31252 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
@@ -923,6 +923,7 @@ void dce_mem_input_v_program_display_marks(
struct mem_input *mem_input,
struct dce_watermarks nbp,
struct dce_watermarks stutter,
+ struct dce_watermarks stutter_enter,
struct dce_watermarks urgent,
uint32_t total_dest_line_time_ns)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index b1f14be20fdf..ee33786bdef6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -930,38 +930,6 @@ static enum dc_status dce110_add_stream_to_ctx(
return result;
}
-static enum dc_status dce110_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *dc_stream,
- struct dc_state *context)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- context->streams[0] = dc_stream;
- dc_stream_retain(context->streams[0]);
- context->stream_count++;
-
- result = resource_map_pool_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = resource_map_clock_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = build_mapped_resource(dc, context, dc_stream);
-
- if (result == DC_OK) {
- validate_guaranteed_copy_streams(
- context, dc->caps.max_streams);
- result = resource_build_scaling_params_for_context(dc, context);
- }
-
- if (result == DC_OK)
- if (!dce110_validate_bandwidth(dc, context))
- result = DC_FAIL_BANDWIDTH_VALIDATE;
-
- return result;
-}
-
static struct pipe_ctx *dce110_acquire_underlay(
struct dc_state *context,
const struct resource_pool *pool,
@@ -1036,7 +1004,6 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)
static const struct resource_funcs dce110_res_pool_funcs = {
.destroy = dce110_destroy_resource_pool,
.link_enc_create = dce110_link_encoder_create,
- .validate_guaranteed = dce110_validate_guaranteed,
.validate_bandwidth = dce110_validate_bandwidth,
.validate_plane = dce110_validate_plane,
.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index be7153924a70..1b2fe0df347f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -431,14 +431,6 @@ void dce110_timing_generator_set_drr(
0,
CRTC_V_TOTAL_CONTROL,
CRTC_SET_V_TOTAL_MIN_MASK);
- set_reg_field_value(v_total_min,
- 0,
- CRTC_V_TOTAL_MIN,
- CRTC_V_TOTAL_MIN);
- set_reg_field_value(v_total_max,
- 0,
- CRTC_V_TOTAL_MAX,
- CRTC_V_TOTAL_MAX);
set_reg_field_value(v_total_cntl,
0,
CRTC_V_TOTAL_CONTROL,
@@ -447,6 +439,14 @@ void dce110_timing_generator_set_drr(
0,
CRTC_V_TOTAL_CONTROL,
CRTC_V_TOTAL_MAX_SEL);
+ set_reg_field_value(v_total_min,
+ 0,
+ CRTC_V_TOTAL_MIN,
+ CRTC_V_TOTAL_MIN);
+ set_reg_field_value(v_total_max,
+ 0,
+ CRTC_V_TOTAL_MAX,
+ CRTC_V_TOTAL_MAX);
set_reg_field_value(v_total_cntl,
0,
CRTC_V_TOTAL_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index 8ad04816e7d3..a3cef60380ed 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -648,12 +648,6 @@ static void dce110_timing_generator_v_disable_vga(
return;
}
-static bool dce110_tg_v_is_blanked(struct timing_generator *tg)
-{
- /* Signal comes from the primary pipe, underlay is never blanked. */
- return false;
-}
-
/** ********************************************************************************************
*
* DCE11 Timing Generator Constructor / Destructor
@@ -670,7 +664,6 @@ static const struct timing_generator_funcs dce110_tg_v_funcs = {
.set_early_control = dce110_timing_generator_v_set_early_control,
.wait_for_state = dce110_timing_generator_v_wait_for_state,
.set_blank = dce110_timing_generator_v_set_blank,
- .is_blanked = dce110_tg_v_is_blanked,
.set_colors = dce110_timing_generator_v_set_colors,
.set_overscan_blank_color =
dce110_timing_generator_v_set_overscan_color_black,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
index 8ba3c12fc608..a7dce060204f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
@@ -373,13 +373,13 @@ static void calculate_inits(
struct rect *chroma_viewport)
{
inits->h_int_scale_ratio_luma =
- dal_fixed31_32_u2d19(data->ratios.horz) << 5;
+ dc_fixpt_u2d19(data->ratios.horz) << 5;
inits->v_int_scale_ratio_luma =
- dal_fixed31_32_u2d19(data->ratios.vert) << 5;
+ dc_fixpt_u2d19(data->ratios.vert) << 5;
inits->h_int_scale_ratio_chroma =
- dal_fixed31_32_u2d19(data->ratios.horz_c) << 5;
+ dc_fixpt_u2d19(data->ratios.horz_c) << 5;
inits->v_int_scale_ratio_chroma =
- dal_fixed31_32_u2d19(data->ratios.vert_c) << 5;
+ dc_fixpt_u2d19(data->ratios.vert_c) << 5;
inits->h_init_luma.integer = 1;
inits->v_init_luma.integer = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index cd1e3f72c44e..00c0a1ef15eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -430,7 +430,7 @@ static struct stream_encoder *dce112_stream_encoder_create(
if (!enc110)
return NULL;
-
+
dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id,
&stream_enc_regs[eng_id],
&se_shift, &se_mask);
@@ -867,38 +867,6 @@ enum dc_status dce112_add_stream_to_ctx(
return result;
}
-enum dc_status dce112_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- context->streams[0] = stream;
- dc_stream_retain(context->streams[0]);
- context->stream_count++;
-
- result = resource_map_pool_resources(dc, context, stream);
-
- if (result == DC_OK)
- result = resource_map_phy_clock_resources(dc, context, stream);
-
- if (result == DC_OK)
- result = build_mapped_resource(dc, context, stream);
-
- if (result == DC_OK) {
- validate_guaranteed_copy_streams(
- context, dc->caps.max_streams);
- result = resource_build_scaling_params_for_context(dc, context);
- }
-
- if (result == DC_OK)
- if (!dce112_validate_bandwidth(dc, context))
- result = DC_FAIL_BANDWIDTH_VALIDATE;
-
- return result;
-}
-
enum dc_status dce112_validate_global(
struct dc *dc,
struct dc_state *context)
@@ -921,7 +889,6 @@ static void dce112_destroy_resource_pool(struct resource_pool **pool)
static const struct resource_funcs dce112_res_pool_funcs = {
.destroy = dce112_destroy_resource_pool,
.link_enc_create = dce112_link_encoder_create,
- .validate_guaranteed = dce112_validate_guaranteed,
.validate_bandwidth = dce112_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce112_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
index d5c19d34eb0a..95a403396219 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
@@ -42,11 +42,6 @@ enum dc_status dce112_validate_with_context(
struct dc_state *context,
struct dc_state *old_context);
-enum dc_status dce112_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *dc_stream,
- struct dc_state *context);
-
bool dce112_validate_bandwidth(
struct dc *dc,
struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 4659a4bfabaa..2d58daccc005 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -652,7 +652,7 @@ static struct mem_input *dce120_mem_input_create(
return NULL;
}
- dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks);
+ dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks);
return &dce_mi->base;
}
@@ -684,7 +684,6 @@ static void dce120_destroy_resource_pool(struct resource_pool **pool)
static const struct resource_funcs dce120_res_pool_funcs = {
.destroy = dce120_destroy_resource_pool,
.link_enc_create = dce120_link_encoder_create,
- .validate_guaranteed = dce112_validate_guaranteed,
.validate_bandwidth = dce112_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce112_add_stream_to_ctx
@@ -815,14 +814,25 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges);
}
+static uint32_t read_pipe_fuses(struct dc_context *ctx)
+{
+ uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0);
+ /* VG20 support max 6 pipes */
+ value = value & 0x3f;
+ return value;
+}
+
static bool construct(
uint8_t num_virtual_links,
struct dc *dc,
struct dce110_resource_pool *pool)
{
unsigned int i;
+ int j;
struct dc_context *ctx = dc->ctx;
struct irq_service_init_data irq_init_data;
+ bool harvest_enabled = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev);
+ uint32_t pipe_fuses;
ctx->dc_bios->regs = &bios_regs;
@@ -916,28 +926,41 @@ static bool construct(
if (!pool->base.irqs)
goto irqs_create_fail;
+ /* retrieve valid pipe fuses */
+ if (harvest_enabled)
+ pipe_fuses = read_pipe_fuses(ctx);
+
+ /* index to valid pipe resource */
+ j = 0;
for (i = 0; i < pool->base.pipe_count; i++) {
- pool->base.timing_generators[i] =
+ if (harvest_enabled) {
+ if ((pipe_fuses & (1 << i)) != 0) {
+ dm_error("DC: skip invalid pipe %d!\n", i);
+ continue;
+ }
+ }
+
+ pool->base.timing_generators[j] =
dce120_timing_generator_create(
ctx,
i,
&dce120_tg_offsets[i]);
- if (pool->base.timing_generators[i] == NULL) {
+ if (pool->base.timing_generators[j] == NULL) {
BREAK_TO_DEBUGGER();
dm_error("DC: failed to create tg!\n");
goto controller_create_fail;
}
- pool->base.mis[i] = dce120_mem_input_create(ctx, i);
+ pool->base.mis[j] = dce120_mem_input_create(ctx, i);
- if (pool->base.mis[i] == NULL) {
+ if (pool->base.mis[j] == NULL) {
BREAK_TO_DEBUGGER();
dm_error(
"DC: failed to create memory input!\n");
goto controller_create_fail;
}
- pool->base.ipps[i] = dce120_ipp_create(ctx, i);
+ pool->base.ipps[j] = dce120_ipp_create(ctx, i);
if (pool->base.ipps[i] == NULL) {
BREAK_TO_DEBUGGER();
dm_error(
@@ -945,7 +968,7 @@ static bool construct(
goto controller_create_fail;
}
- pool->base.transforms[i] = dce120_transform_create(ctx, i);
+ pool->base.transforms[j] = dce120_transform_create(ctx, i);
if (pool->base.transforms[i] == NULL) {
BREAK_TO_DEBUGGER();
dm_error(
@@ -953,16 +976,23 @@ static bool construct(
goto res_create_fail;
}
- pool->base.opps[i] = dce120_opp_create(
+ pool->base.opps[j] = dce120_opp_create(
ctx,
i);
- if (pool->base.opps[i] == NULL) {
+ if (pool->base.opps[j] == NULL) {
BREAK_TO_DEBUGGER();
dm_error(
"DC: failed to create output pixel processor!\n");
}
+
+ /* check next valid pipe */
+ j++;
}
+ /* valid pipe num */
+ pool->base.pipe_count = j;
+ pool->base.timing_generator_count = j;
+
if (!resource_construct(num_virtual_links, dc, &pool->base,
&res_create_funcs))
goto res_create_fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 7bee78172d85..2ea490f8482e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -570,12 +570,6 @@ void dce120_timing_generator_set_drr(
0x180);
} else {
- CRTC_REG_UPDATE(
- CRTC0_CRTC_V_TOTAL_MIN,
- CRTC_V_TOTAL_MIN, 0);
- CRTC_REG_UPDATE(
- CRTC0_CRTC_V_TOTAL_MAX,
- CRTC_V_TOTAL_MAX, 0);
CRTC_REG_SET_N(CRTC0_CRTC_V_TOTAL_CONTROL, 5,
FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MIN_SEL), 0,
FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MAX_SEL), 0,
@@ -583,6 +577,12 @@ void dce120_timing_generator_set_drr(
FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_FORCE_LOCK_TO_MASTER_VSYNC), 0,
FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_SET_V_TOTAL_MIN_MASK), 0);
CRTC_REG_UPDATE(
+ CRTC0_CRTC_V_TOTAL_MIN,
+ CRTC_V_TOTAL_MIN, 0);
+ CRTC_REG_UPDATE(
+ CRTC0_CRTC_V_TOTAL_MAX,
+ CRTC_V_TOTAL_MAX, 0);
+ CRTC_REG_UPDATE(
CRTC0_CRTC_STATIC_SCREEN_CONTROL,
CRTC_STATIC_SCREEN_EVENT_MASK,
0);
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 5d854a37a978..48a068964722 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -691,23 +691,6 @@ static void destruct(struct dce110_resource_pool *pool)
}
}
-static enum dc_status build_mapped_resource(
- const struct dc *dc,
- struct dc_state *context,
- struct dc_stream_state *stream)
-{
- struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
-
- if (!pipe_ctx)
- return DC_ERROR_UNEXPECTED;
-
- dce110_resource_build_pipe_hw_param(pipe_ctx);
-
- resource_build_info_frame(pipe_ctx);
-
- return DC_OK;
-}
-
bool dce80_validate_bandwidth(
struct dc *dc,
struct dc_state *context)
@@ -749,37 +732,6 @@ enum dc_status dce80_validate_global(
return DC_OK;
}
-enum dc_status dce80_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *dc_stream,
- struct dc_state *context)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- context->streams[0] = dc_stream;
- dc_stream_retain(context->streams[0]);
- context->stream_count++;
-
- result = resource_map_pool_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = resource_map_clock_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = build_mapped_resource(dc, context, dc_stream);
-
- if (result == DC_OK) {
- validate_guaranteed_copy_streams(
- context, dc->caps.max_streams);
- result = resource_build_scaling_params_for_context(dc, context);
- }
-
- if (result == DC_OK)
- result = dce80_validate_bandwidth(dc, context);
-
- return result;
-}
-
static void dce80_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -792,7 +744,6 @@ static void dce80_destroy_resource_pool(struct resource_pool **pool)
static const struct resource_funcs dce80_res_pool_funcs = {
.destroy = dce80_destroy_resource_pool,
.link_enc_create = dce80_link_encoder_create,
- .validate_guaranteed = dce80_validate_guaranteed,
.validate_bandwidth = dce80_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 5469bdfe19f3..84f52c63d95c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -26,7 +26,7 @@ DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
dcn10_hubp.o dcn10_mpc.o \
dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
- dcn10_hubbub.o
+ dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 881a1bff94d2..5d95a997fd9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -169,7 +169,7 @@ bool cm_helper_convert_to_custom_float(
}
if (fixpoint == true)
- arr_points[1].custom_float_y = dal_fixed31_32_clamp_u0d14(arr_points[1].y);
+ arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y);
else if (!convert_to_custom_float_format(arr_points[1].y, &fmt,
&arr_points[1].custom_float_y)) {
BREAK_TO_DEBUGGER();
@@ -327,19 +327,19 @@ bool cm_helper_translate_curve_to_hw_format(
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
- arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_start));
- arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_end));
+ arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_start));
+ arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_end));
y_r = rgb_resulted[0].red;
y_g = rgb_resulted[0].green;
y_b = rgb_resulted[0].blue;
- y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+ y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
arr_points[0].y = y1_min;
- arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
+ arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x);
y_r = rgb_resulted[hw_points - 1].red;
y_g = rgb_resulted[hw_points - 1].green;
y_b = rgb_resulted[hw_points - 1].blue;
@@ -347,35 +347,35 @@ bool cm_helper_translate_curve_to_hw_format(
/* see comment above, m_arrPoints[1].y should be the Y value for the
* region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
*/
- y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+ y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
arr_points[1].y = y3_max;
- arr_points[1].slope = dal_fixed31_32_zero;
+ arr_points[1].slope = dc_fixpt_zero;
if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
/* for PQ, we want to have a straight line from last HW X point,
* and the slope to be such that we hit 1.0 at 10000 nits.
*/
const struct fixed31_32 end_value =
- dal_fixed31_32_from_int(125);
+ dc_fixpt_from_int(125);
- arr_points[1].slope = dal_fixed31_32_div(
- dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
- dal_fixed31_32_sub(end_value, arr_points[1].x));
+ arr_points[1].slope = dc_fixpt_div(
+ dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+ dc_fixpt_sub(end_value, arr_points[1].x));
}
lut_params->hw_points_num = hw_points;
- i = 1;
- for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
+ k = 0;
+ for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
if (seg_distr[k] != -1) {
lut_params->arr_curve_points[k].segments_num =
seg_distr[k];
lut_params->arr_curve_points[i].offset =
lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
}
- i++;
+ k++;
}
if (seg_distr[k] != -1)
@@ -386,24 +386,24 @@ bool cm_helper_translate_curve_to_hw_format(
i = 1;
while (i != hw_points + 1) {
- if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+ if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
rgb_plus_1->red = rgb->red;
- if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+ if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
rgb_plus_1->green = rgb->green;
- if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+ if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
rgb_plus_1->blue = rgb->blue;
- rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue);
+ rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
+ rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+ rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
if (fixpoint == true) {
- rgb->delta_red_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_red);
- rgb->delta_green_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_green);
- rgb->delta_blue_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_blue);
- rgb->red_reg = dal_fixed31_32_clamp_u0d14(rgb->red);
- rgb->green_reg = dal_fixed31_32_clamp_u0d14(rgb->green);
- rgb->blue_reg = dal_fixed31_32_clamp_u0d14(rgb->blue);
+ rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red);
+ rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
+ rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue);
+ rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red);
+ rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green);
+ rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue);
}
++rgb_plus_1;
@@ -489,19 +489,19 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
- arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_start));
- arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
- dal_fixed31_32_from_int(region_end));
+ arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_start));
+ arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+ dc_fixpt_from_int(region_end));
y_r = rgb_resulted[0].red;
y_g = rgb_resulted[0].green;
y_b = rgb_resulted[0].blue;
- y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+ y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
arr_points[0].y = y1_min;
- arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
+ arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x);
y_r = rgb_resulted[hw_points - 1].red;
y_g = rgb_resulted[hw_points - 1].green;
y_b = rgb_resulted[hw_points - 1].blue;
@@ -509,35 +509,35 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
/* see comment above, m_arrPoints[1].y should be the Y value for the
* region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
*/
- y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+ y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
arr_points[1].y = y3_max;
- arr_points[1].slope = dal_fixed31_32_zero;
+ arr_points[1].slope = dc_fixpt_zero;
if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
/* for PQ, we want to have a straight line from last HW X point,
* and the slope to be such that we hit 1.0 at 10000 nits.
*/
const struct fixed31_32 end_value =
- dal_fixed31_32_from_int(125);
+ dc_fixpt_from_int(125);
- arr_points[1].slope = dal_fixed31_32_div(
- dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
- dal_fixed31_32_sub(end_value, arr_points[1].x));
+ arr_points[1].slope = dc_fixpt_div(
+ dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+ dc_fixpt_sub(end_value, arr_points[1].x));
}
lut_params->hw_points_num = hw_points;
- i = 1;
- for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
+ k = 0;
+ for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
if (seg_distr[k] != -1) {
lut_params->arr_curve_points[k].segments_num =
seg_distr[k];
lut_params->arr_curve_points[i].offset =
lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
}
- i++;
+ k++;
}
if (seg_distr[k] != -1)
@@ -548,16 +548,16 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
i = 1;
while (i != hw_points + 1) {
- if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+ if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
rgb_plus_1->red = rgb->red;
- if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+ if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
rgb_plus_1->green = rgb->green;
- if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+ if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
rgb_plus_1->blue = rgb->blue;
- rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue);
+ rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
+ rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+ rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
++rgb_plus_1;
++rgb;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index e305c28c98de..46a35c7f01df 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -98,6 +98,30 @@ enum gamut_remap_select {
GAMUT_REMAP_COMB_COEFF
};
+void dpp_read_state(struct dpp *dpp_base,
+ struct dcn_dpp_state *s)
+{
+ struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
+
+ REG_GET(CM_IGAM_CONTROL,
+ CM_IGAM_LUT_MODE, &s->igam_lut_mode);
+ REG_GET(CM_IGAM_CONTROL,
+ CM_IGAM_INPUT_FORMAT, &s->igam_input_format);
+ REG_GET(CM_DGAM_CONTROL,
+ CM_DGAM_LUT_MODE, &s->dgam_lut_mode);
+ REG_GET(CM_RGAM_CONTROL,
+ CM_RGAM_LUT_MODE, &s->rgam_lut_mode);
+ REG_GET(CM_GAMUT_REMAP_CONTROL,
+ CM_GAMUT_REMAP_MODE, &s->gamut_remap_mode);
+
+ s->gamut_remap_c11_c12 = REG_READ(CM_GAMUT_REMAP_C11_C12);
+ s->gamut_remap_c13_c14 = REG_READ(CM_GAMUT_REMAP_C13_C14);
+ s->gamut_remap_c21_c22 = REG_READ(CM_GAMUT_REMAP_C21_C22);
+ s->gamut_remap_c23_c24 = REG_READ(CM_GAMUT_REMAP_C23_C24);
+ s->gamut_remap_c31_c32 = REG_READ(CM_GAMUT_REMAP_C31_C32);
+ s->gamut_remap_c33_c34 = REG_READ(CM_GAMUT_REMAP_C33_C34);
+}
+
/* Program gamut remap in bypass mode */
void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp)
{
@@ -106,7 +130,7 @@ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp)
/* Gamut remap in bypass */
}
-#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19))
+#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19))
bool dpp_get_optimal_number_of_taps(
@@ -121,6 +145,18 @@ bool dpp_get_optimal_number_of_taps(
else
pixel_width = scl_data->viewport.width;
+ /* Some ASICs does not support FP16 scaling, so we reject modes require this*/
+ if (scl_data->viewport.width != scl_data->h_active &&
+ scl_data->viewport.height != scl_data->v_active &&
+ dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
+ scl_data->format == PIXEL_FORMAT_FP16)
+ return false;
+
+ if (scl_data->viewport.width > scl_data->h_active &&
+ dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
+ scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
+ return false;
+
/* TODO: add lb check */
/* No support for programming ratio of 4, drop to 3.99999.. */
@@ -257,7 +293,7 @@ void dpp1_cnv_setup (
struct dpp *dpp_base,
enum surface_pixel_format format,
enum expansion_mode mode,
- struct csc_transform input_csc_color_matrix,
+ struct dc_csc_transform input_csc_color_matrix,
enum dc_color_space input_color_space)
{
uint32_t pixel_format;
@@ -416,7 +452,7 @@ void dpp1_set_cursor_position(
if (src_x_offset >= (int)param->viewport_width)
cur_en = 0; /* not visible beyond right edge*/
- if (src_x_offset + (int)width < 0)
+ if (src_x_offset + (int)width <= 0)
cur_en = 0; /* not visible beyond left edge*/
REG_UPDATE(CURSOR0_CONTROL,
@@ -443,6 +479,7 @@ void dpp1_dppclk_control(
}
static const struct dpp_funcs dcn10_dpp_funcs = {
+ .dpp_read_state = dpp_read_state,
.dpp_reset = dpp_reset,
.dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
.dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
index 17b062a8f88a..5944a3ba0409 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
@@ -44,6 +44,10 @@
#define TF_REG_LIST_DCN(id) \
SRI(CM_GAMUT_REMAP_CONTROL, CM, id),\
SRI(CM_GAMUT_REMAP_C11_C12, CM, id),\
+ SRI(CM_GAMUT_REMAP_C13_C14, CM, id),\
+ SRI(CM_GAMUT_REMAP_C21_C22, CM, id),\
+ SRI(CM_GAMUT_REMAP_C23_C24, CM, id),\
+ SRI(CM_GAMUT_REMAP_C31_C32, CM, id),\
SRI(CM_GAMUT_REMAP_C33_C34, CM, id),\
SRI(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \
SRI(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \
@@ -108,6 +112,8 @@
SRI(CM_DGAM_LUT_DATA, CM, id), \
SRI(CM_CONTROL, CM, id), \
SRI(CM_DGAM_CONTROL, CM, id), \
+ SRI(CM_TEST_DEBUG_INDEX, CM, id), \
+ SRI(CM_TEST_DEBUG_DATA, CM, id), \
SRI(FORMAT_CONTROL, CNVC_CFG, id), \
SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \
SRI(CURSOR0_CONTROL, CNVC_CUR, id), \
@@ -175,6 +181,14 @@
TF_SF(CM0_CM_GAMUT_REMAP_CONTROL, CM_GAMUT_REMAP_MODE, mask_sh),\
TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C11, mask_sh),\
TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C12, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C13, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C14, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C21, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C22, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C23, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C24, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C31, mask_sh),\
+ TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C32, mask_sh),\
TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C33, mask_sh),\
TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C34, mask_sh),\
TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\
@@ -300,6 +314,7 @@
TF_SF(CM0_CM_DGAM_LUT_INDEX, CM_DGAM_LUT_INDEX, mask_sh), \
TF_SF(CM0_CM_DGAM_LUT_DATA, CM_DGAM_LUT_DATA, mask_sh), \
TF_SF(CM0_CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, mask_sh), \
+ TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \
TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \
TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \
TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \
@@ -417,6 +432,41 @@
TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \
TF_SF(DPP_TOP0_DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh)
+/*
+ *
+ DCN1 CM debug status register definition
+
+ register :ID9_CM_STATUS do
+ implement_ref :cm
+ map to: :cmdebugind, at: j
+ width 32
+ disclosure NEVER
+
+ field :ID9_VUPDATE_CFG, [0], R
+ field :ID9_IGAM_LUT_MODE, [2..1], R
+ field :ID9_BNS_BYPASS, [3], R
+ field :ID9_ICSC_MODE, [5..4], R
+ field :ID9_DGAM_LUT_MODE, [8..6], R
+ field :ID9_HDR_BYPASS, [9], R
+ field :ID9_GAMUT_REMAP_MODE, [11..10], R
+ field :ID9_RGAM_LUT_MODE, [14..12], R
+ #1 free bit
+ field :ID9_OCSC_MODE, [18..16], R
+ field :ID9_DENORM_MODE, [21..19], R
+ field :ID9_ROUND_TRUNC_MODE, [25..22], R
+ field :ID9_DITHER_EN, [26], R
+ field :ID9_DITHER_MODE, [28..27], R
+ end
+*/
+
+#define TF_DEBUG_REG_LIST_SH_DCN10 \
+ .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 4, \
+ .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 16
+
+#define TF_DEBUG_REG_LIST_MASK_DCN10 \
+ .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 0x30, \
+ .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 0x70000
+
#define TF_REG_FIELD_LIST(type) \
type EXT_OVERSCAN_LEFT; \
type EXT_OVERSCAN_RIGHT; \
@@ -486,6 +536,14 @@
type CM_GAMUT_REMAP_MODE; \
type CM_GAMUT_REMAP_C11; \
type CM_GAMUT_REMAP_C12; \
+ type CM_GAMUT_REMAP_C13; \
+ type CM_GAMUT_REMAP_C14; \
+ type CM_GAMUT_REMAP_C21; \
+ type CM_GAMUT_REMAP_C22; \
+ type CM_GAMUT_REMAP_C23; \
+ type CM_GAMUT_REMAP_C24; \
+ type CM_GAMUT_REMAP_C31; \
+ type CM_GAMUT_REMAP_C32; \
type CM_GAMUT_REMAP_C33; \
type CM_GAMUT_REMAP_C34; \
type CM_COMA_C11; \
@@ -1010,6 +1068,9 @@
type CUR0_EXPANSION_MODE; \
type CUR0_ENABLE; \
type CM_BYPASS; \
+ type CM_TEST_DEBUG_INDEX; \
+ type CM_TEST_DEBUG_DATA_ID9_ICSC_MODE; \
+ type CM_TEST_DEBUG_DATA_ID9_OCSC_MODE;\
type FORMAT_CONTROL__ALPHA_EN; \
type CUR0_COLOR0; \
type CUR0_COLOR1; \
@@ -1054,6 +1115,10 @@ struct dcn_dpp_mask {
uint32_t RECOUT_SIZE; \
uint32_t CM_GAMUT_REMAP_CONTROL; \
uint32_t CM_GAMUT_REMAP_C11_C12; \
+ uint32_t CM_GAMUT_REMAP_C13_C14; \
+ uint32_t CM_GAMUT_REMAP_C21_C22; \
+ uint32_t CM_GAMUT_REMAP_C23_C24; \
+ uint32_t CM_GAMUT_REMAP_C31_C32; \
uint32_t CM_GAMUT_REMAP_C33_C34; \
uint32_t CM_COMA_C11_C12; \
uint32_t CM_COMA_C33_C34; \
@@ -1255,6 +1320,8 @@ struct dcn_dpp_mask {
uint32_t CM_IGAM_LUT_RW_CONTROL; \
uint32_t CM_IGAM_LUT_RW_INDEX; \
uint32_t CM_IGAM_LUT_SEQ_COLOR; \
+ uint32_t CM_TEST_DEBUG_INDEX; \
+ uint32_t CM_TEST_DEBUG_DATA; \
uint32_t FORMAT_CONTROL; \
uint32_t CNVC_SURFACE_PIXEL_FORMAT; \
uint32_t CURSOR_CONTROL; \
@@ -1289,8 +1356,8 @@ struct dcn10_dpp {
enum dcn10_input_csc_select {
INPUT_CSC_SELECT_BYPASS = 0,
- INPUT_CSC_SELECT_ICSC,
- INPUT_CSC_SELECT_COMA
+ INPUT_CSC_SELECT_ICSC = 1,
+ INPUT_CSC_SELECT_COMA = 2
};
void dpp1_set_cursor_attributes(
@@ -1364,6 +1431,9 @@ bool dpp_get_optimal_number_of_taps(
struct scaler_data *scl_data,
const struct scaling_taps *in_taps);
+void dpp_read_state(struct dpp *dpp_base,
+ struct dcn_dpp_state *s);
+
void dpp_reset(struct dpp *dpp_base);
void dpp1_cm_program_regamma_lut(
@@ -1408,7 +1478,7 @@ void dpp1_cnv_setup (
struct dpp *dpp_base,
enum surface_pixel_format format,
enum expansion_mode mode,
- struct csc_transform input_csc_color_matrix,
+ struct dc_csc_transform input_csc_color_matrix,
enum dc_color_space input_color_space);
void dpp1_full_bypass(struct dpp *dpp_base);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
index fb32975e4b67..116977eb24e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
@@ -216,41 +216,55 @@ static void dpp1_cm_program_color_matrix(
struct dcn10_dpp *dpp,
const uint16_t *regval)
{
- uint32_t mode;
+ uint32_t ocsc_mode;
+ uint32_t cur_mode;
struct color_matrices_reg gam_regs;
- REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode);
-
if (regval == NULL) {
BREAK_TO_DEBUGGER();
return;
}
- mode = 4;
+
+ /* determine which CSC matrix (ocsc or comb) we are using
+ * currently. select the alternate set to double buffer
+ * the CSC update so CSC is updated on frame boundary
+ */
+ REG_SET(CM_TEST_DEBUG_INDEX, 0,
+ CM_TEST_DEBUG_INDEX, 9);
+
+ REG_GET(CM_TEST_DEBUG_DATA,
+ CM_TEST_DEBUG_DATA_ID9_OCSC_MODE, &cur_mode);
+
+ if (cur_mode != 4)
+ ocsc_mode = 4;
+ else
+ ocsc_mode = 5;
+
+
gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11;
gam_regs.masks.csc_c11 = dpp->tf_mask->CM_OCSC_C11;
gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12;
gam_regs.masks.csc_c12 = dpp->tf_mask->CM_OCSC_C12;
- if (mode == 4) {
+ if (ocsc_mode == 4) {
gam_regs.csc_c11_c12 = REG(CM_OCSC_C11_C12);
gam_regs.csc_c33_c34 = REG(CM_OCSC_C33_C34);
- cm_helper_program_color_matrices(
- dpp->base.ctx,
- regval,
- &gam_regs);
-
} else {
gam_regs.csc_c11_c12 = REG(CM_COMB_C11_C12);
gam_regs.csc_c33_c34 = REG(CM_COMB_C33_C34);
- cm_helper_program_color_matrices(
- dpp->base.ctx,
- regval,
- &gam_regs);
}
+
+ cm_helper_program_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
+
}
void dpp1_cm_set_output_csc_default(
@@ -260,15 +274,14 @@ void dpp1_cm_set_output_csc_default(
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
const uint16_t *regval = NULL;
int arr_size;
- uint32_t ocsc_mode = 4;
regval = find_color_matrix(colorspace, &arr_size);
if (regval == NULL) {
BREAK_TO_DEBUGGER();
return;
}
+
dpp1_cm_program_color_matrix(dpp, regval);
- REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
}
static void dpp1_cm_get_reg_field(
@@ -329,9 +342,8 @@ void dpp1_cm_set_output_csc_adjustment(
const uint16_t *regval)
{
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
- uint32_t ocsc_mode = 4;
+
dpp1_cm_program_color_matrix(dpp, regval);
- REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
}
void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base,
@@ -437,17 +449,18 @@ void dpp1_cm_program_regamma_lutb_settings(
void dpp1_program_input_csc(
struct dpp *dpp_base,
enum dc_color_space color_space,
- enum dcn10_input_csc_select select,
+ enum dcn10_input_csc_select input_select,
const struct out_csc_color_matrix *tbl_entry)
{
struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
int i;
int arr_size = sizeof(dcn10_input_csc_matrix)/sizeof(struct dcn10_input_csc_matrix);
const uint16_t *regval = NULL;
- uint32_t selection = 1;
+ uint32_t cur_select = 0;
+ enum dcn10_input_csc_select select;
struct color_matrices_reg gam_regs;
- if (select == INPUT_CSC_SELECT_BYPASS) {
+ if (input_select == INPUT_CSC_SELECT_BYPASS) {
REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0);
return;
}
@@ -467,36 +480,45 @@ void dpp1_program_input_csc(
regval = tbl_entry->regval;
}
- if (select == INPUT_CSC_SELECT_COMA)
- selection = 2;
- REG_SET(CM_ICSC_CONTROL, 0,
- CM_ICSC_MODE, selection);
+ /* determine which CSC matrix (icsc or coma) we are using
+ * currently. select the alternate set to double buffer
+ * the CSC update so CSC is updated on frame boundary
+ */
+ REG_SET(CM_TEST_DEBUG_INDEX, 0,
+ CM_TEST_DEBUG_INDEX, 9);
+
+ REG_GET(CM_TEST_DEBUG_DATA,
+ CM_TEST_DEBUG_DATA_ID9_ICSC_MODE, &cur_select);
+
+ if (cur_select != INPUT_CSC_SELECT_ICSC)
+ select = INPUT_CSC_SELECT_ICSC;
+ else
+ select = INPUT_CSC_SELECT_COMA;
gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11;
gam_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11;
gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12;
gam_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12;
-
if (select == INPUT_CSC_SELECT_ICSC) {
gam_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12);
gam_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34);
- cm_helper_program_color_matrices(
- dpp->base.ctx,
- regval,
- &gam_regs);
} else {
gam_regs.csc_c11_c12 = REG(CM_COMA_C11_C12);
gam_regs.csc_c33_c34 = REG(CM_COMA_C33_C34);
- cm_helper_program_color_matrices(
- dpp->base.ctx,
- regval,
- &gam_regs);
}
+
+ cm_helper_program_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ REG_SET(CM_ICSC_CONTROL, 0,
+ CM_ICSC_MODE, select);
}
//keep here for now, decide multi dce support later
@@ -789,13 +811,13 @@ void dpp1_program_input_lut(
REG_UPDATE(CM_IGAM_LUT_RW_INDEX, CM_IGAM_LUT_RW_INDEX, 0);
for (i = 0; i < gamma->num_entries; i++) {
REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.red[i]));
REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.green[i]));
REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
- dal_fixed31_32_round(
+ dc_fixpt_round(
gamma->entries.blue[i]));
}
// Power off LUT memory
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
index 3eb824debf43..4ddd6273d5a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
@@ -169,7 +169,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(
const struct scaler_data *data,
bool dbg_always_scale)
{
- const long long one = dal_fixed31_32_one.value;
+ const long long one = dc_fixpt_one.value;
if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
/* DSCL is processing data in fixed format */
@@ -464,8 +464,8 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d
int num_part_y, num_part_c;
int vtaps = scl_data->taps.v_taps;
int vtaps_c = scl_data->taps.v_taps_c;
- int ceil_vratio = dal_fixed31_32_ceil(scl_data->ratios.vert);
- int ceil_vratio_c = dal_fixed31_32_ceil(scl_data->ratios.vert_c);
+ int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert);
+ int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c);
enum lb_memory_config mem_cfg = LB_MEMORY_CONFIG_0;
if (dpp->base.ctx->dc->debug.use_max_lb)
@@ -565,52 +565,52 @@ static void dpp1_dscl_set_manual_ratio_init(
uint32_t init_int = 0;
REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0,
- SCL_H_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.horz) << 5);
+ SCL_H_SCALE_RATIO, dc_fixpt_u2d19(data->ratios.horz) << 5);
REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0,
- SCL_V_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.vert) << 5);
+ SCL_V_SCALE_RATIO, dc_fixpt_u2d19(data->ratios.vert) << 5);
REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0,
- SCL_H_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.horz_c) << 5);
+ SCL_H_SCALE_RATIO_C, dc_fixpt_u2d19(data->ratios.horz_c) << 5);
REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0,
- SCL_V_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.vert_c) << 5);
+ SCL_V_SCALE_RATIO_C, dc_fixpt_u2d19(data->ratios.vert_c) << 5);
/*
* 0.24 format for fraction, first five bits zeroed
*/
- init_frac = dal_fixed31_32_u0d19(data->inits.h) << 5;
- init_int = dal_fixed31_32_floor(data->inits.h);
+ init_frac = dc_fixpt_u0d19(data->inits.h) << 5;
+ init_int = dc_fixpt_floor(data->inits.h);
REG_SET_2(SCL_HORZ_FILTER_INIT, 0,
SCL_H_INIT_FRAC, init_frac,
SCL_H_INIT_INT, init_int);
- init_frac = dal_fixed31_32_u0d19(data->inits.h_c) << 5;
- init_int = dal_fixed31_32_floor(data->inits.h_c);
+ init_frac = dc_fixpt_u0d19(data->inits.h_c) << 5;
+ init_int = dc_fixpt_floor(data->inits.h_c);
REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0,
SCL_H_INIT_FRAC_C, init_frac,
SCL_H_INIT_INT_C, init_int);
- init_frac = dal_fixed31_32_u0d19(data->inits.v) << 5;
- init_int = dal_fixed31_32_floor(data->inits.v);
+ init_frac = dc_fixpt_u0d19(data->inits.v) << 5;
+ init_int = dc_fixpt_floor(data->inits.v);
REG_SET_2(SCL_VERT_FILTER_INIT, 0,
SCL_V_INIT_FRAC, init_frac,
SCL_V_INIT_INT, init_int);
- init_frac = dal_fixed31_32_u0d19(data->inits.v_bot) << 5;
- init_int = dal_fixed31_32_floor(data->inits.v_bot);
+ init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5;
+ init_int = dc_fixpt_floor(data->inits.v_bot);
REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0,
SCL_V_INIT_FRAC_BOT, init_frac,
SCL_V_INIT_INT_BOT, init_int);
- init_frac = dal_fixed31_32_u0d19(data->inits.v_c) << 5;
- init_int = dal_fixed31_32_floor(data->inits.v_c);
+ init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5;
+ init_int = dc_fixpt_floor(data->inits.v_c);
REG_SET_2(SCL_VERT_FILTER_INIT_C, 0,
SCL_V_INIT_FRAC_C, init_frac,
SCL_V_INIT_INT_C, init_int);
- init_frac = dal_fixed31_32_u0d19(data->inits.v_c_bot) << 5;
- init_int = dal_fixed31_32_floor(data->inits.v_c_bot);
+ init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5;
+ init_int = dc_fixpt_floor(data->inits.v_c_bot);
REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0,
SCL_V_INIT_FRAC_BOT_C, init_frac,
SCL_V_INIT_INT_BOT_C, init_int);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 738f67ffd1b4..943143efbb82 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -476,8 +476,235 @@ void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub)
DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, watermark_change_req);
}
+void hubbub1_soft_reset(struct hubbub *hubbub, bool reset)
+{
+ uint32_t reset_en = reset ? 1 : 0;
+
+ REG_UPDATE(DCHUBBUB_SOFT_RESET,
+ DCHUBBUB_GLOBAL_SOFT_RESET, reset_en);
+}
+
+static bool hubbub1_dcc_support_swizzle(
+ enum swizzle_mode_values swizzle,
+ unsigned int bytes_per_element,
+ enum segment_order *segment_order_horz,
+ enum segment_order *segment_order_vert)
+{
+ bool standard_swizzle = false;
+ bool display_swizzle = false;
+
+ switch (swizzle) {
+ case DC_SW_4KB_S:
+ case DC_SW_64KB_S:
+ case DC_SW_VAR_S:
+ case DC_SW_4KB_S_X:
+ case DC_SW_64KB_S_X:
+ case DC_SW_VAR_S_X:
+ standard_swizzle = true;
+ break;
+ case DC_SW_4KB_D:
+ case DC_SW_64KB_D:
+ case DC_SW_VAR_D:
+ case DC_SW_4KB_D_X:
+ case DC_SW_64KB_D_X:
+ case DC_SW_VAR_D_X:
+ display_swizzle = true;
+ break;
+ default:
+ break;
+ }
+
+ if (bytes_per_element == 1 && standard_swizzle) {
+ *segment_order_horz = segment_order__contiguous;
+ *segment_order_vert = segment_order__na;
+ return true;
+ }
+ if (bytes_per_element == 2 && standard_swizzle) {
+ *segment_order_horz = segment_order__non_contiguous;
+ *segment_order_vert = segment_order__contiguous;
+ return true;
+ }
+ if (bytes_per_element == 4 && standard_swizzle) {
+ *segment_order_horz = segment_order__non_contiguous;
+ *segment_order_vert = segment_order__contiguous;
+ return true;
+ }
+ if (bytes_per_element == 8 && standard_swizzle) {
+ *segment_order_horz = segment_order__na;
+ *segment_order_vert = segment_order__contiguous;
+ return true;
+ }
+ if (bytes_per_element == 8 && display_swizzle) {
+ *segment_order_horz = segment_order__contiguous;
+ *segment_order_vert = segment_order__non_contiguous;
+ return true;
+ }
+
+ return false;
+}
+
+static bool hubbub1_dcc_support_pixel_format(
+ enum surface_pixel_format format,
+ unsigned int *bytes_per_element)
+{
+ /* DML: get_bytes_per_element */
+ switch (format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ *bytes_per_element = 2;
+ return true;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ *bytes_per_element = 4;
+ return true;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ *bytes_per_element = 8;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void hubbub1_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
+ unsigned int bytes_per_element)
+{
+ /* copied from DML. might want to refactor DML to leverage from DML */
+ /* DML : get_blk256_size */
+ if (bytes_per_element == 1) {
+ *blk256_width = 16;
+ *blk256_height = 16;
+ } else if (bytes_per_element == 2) {
+ *blk256_width = 16;
+ *blk256_height = 8;
+ } else if (bytes_per_element == 4) {
+ *blk256_width = 8;
+ *blk256_height = 8;
+ } else if (bytes_per_element == 8) {
+ *blk256_width = 8;
+ *blk256_height = 4;
+ }
+}
+
+static void hubbub1_det_request_size(
+ unsigned int height,
+ unsigned int width,
+ unsigned int bpe,
+ bool *req128_horz_wc,
+ bool *req128_vert_wc)
+{
+ unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */
+
+ unsigned int blk256_height = 0;
+ unsigned int blk256_width = 0;
+ unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
+
+ hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe);
+
+ swath_bytes_horz_wc = height * blk256_height * bpe;
+ swath_bytes_vert_wc = width * blk256_width * bpe;
+
+ *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+
+ *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+}
+
+static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub,
+ const struct dc_dcc_surface_param *input,
+ struct dc_surface_dcc_cap *output)
+{
+ struct dc *dc = hubbub->ctx->dc;
+ /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
+ enum dcc_control dcc_control;
+ unsigned int bpe;
+ enum segment_order segment_order_horz, segment_order_vert;
+ bool req128_horz_wc, req128_vert_wc;
+
+ memset(output, 0, sizeof(*output));
+
+ if (dc->debug.disable_dcc == DCC_DISABLE)
+ return false;
+
+ if (!hubbub->funcs->dcc_support_pixel_format(input->format, &bpe))
+ return false;
+
+ if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe,
+ &segment_order_horz, &segment_order_vert))
+ return false;
+
+ hubbub1_det_request_size(input->surface_size.height, input->surface_size.width,
+ bpe, &req128_horz_wc, &req128_vert_wc);
+
+ if (!req128_horz_wc && !req128_vert_wc) {
+ dcc_control = dcc_control__256_256_xxx;
+ } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+ if (!req128_horz_wc)
+ dcc_control = dcc_control__256_256_xxx;
+ else if (segment_order_horz == segment_order__contiguous)
+ dcc_control = dcc_control__128_128_xxx;
+ else
+ dcc_control = dcc_control__256_64_64;
+ } else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+ if (!req128_vert_wc)
+ dcc_control = dcc_control__256_256_xxx;
+ else if (segment_order_vert == segment_order__contiguous)
+ dcc_control = dcc_control__128_128_xxx;
+ else
+ dcc_control = dcc_control__256_64_64;
+ } else {
+ if ((req128_horz_wc &&
+ segment_order_horz == segment_order__non_contiguous) ||
+ (req128_vert_wc &&
+ segment_order_vert == segment_order__non_contiguous))
+ /* access_dir not known, must use most constraining */
+ dcc_control = dcc_control__256_64_64;
+ else
+ /* reg128 is true for either horz and vert
+ * but segment_order is contiguous
+ */
+ dcc_control = dcc_control__128_128_xxx;
+ }
+
+ if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
+ dcc_control != dcc_control__256_256_xxx)
+ return false;
+
+ switch (dcc_control) {
+ case dcc_control__256_256_xxx:
+ output->grph.rgb.max_uncompressed_blk_size = 256;
+ output->grph.rgb.max_compressed_blk_size = 256;
+ output->grph.rgb.independent_64b_blks = false;
+ break;
+ case dcc_control__128_128_xxx:
+ output->grph.rgb.max_uncompressed_blk_size = 128;
+ output->grph.rgb.max_compressed_blk_size = 128;
+ output->grph.rgb.independent_64b_blks = false;
+ break;
+ case dcc_control__256_64_64:
+ output->grph.rgb.max_uncompressed_blk_size = 256;
+ output->grph.rgb.max_compressed_blk_size = 64;
+ output->grph.rgb.independent_64b_blks = true;
+ break;
+ }
+
+ output->capable = true;
+ output->const_color_support = false;
+
+ return true;
+}
+
static const struct hubbub_funcs hubbub1_funcs = {
- .update_dchub = hubbub1_update_dchub
+ .update_dchub = hubbub1_update_dchub,
+ .dcc_support_swizzle = hubbub1_dcc_support_swizzle,
+ .dcc_support_pixel_format = hubbub1_dcc_support_pixel_format,
+ .get_dcc_compression_cap = hubbub1_get_dcc_compression_cap,
};
void hubbub1_construct(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index a16e908821a0..6315a0e6b0d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -27,6 +27,7 @@
#define __DC_HUBBUB_DCN10_H__
#include "core_types.h"
+#include "dchubbub.h"
#define HUBHUB_REG_LIST_DCN()\
SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\
@@ -47,7 +48,8 @@
SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND),\
SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
SR(DCHUBBUB_TEST_DEBUG_INDEX), \
- SR(DCHUBBUB_TEST_DEBUG_DATA)
+ SR(DCHUBBUB_TEST_DEBUG_DATA),\
+ SR(DCHUBBUB_SOFT_RESET)
#define HUBBUB_SR_WATERMARK_REG_LIST()\
SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\
@@ -104,6 +106,7 @@ struct dcn_hubbub_registers {
uint32_t DCHUBBUB_SDPIF_AGP_BOT;
uint32_t DCHUBBUB_SDPIF_AGP_TOP;
uint32_t DCHUBBUB_CRC_CTRL;
+ uint32_t DCHUBBUB_SOFT_RESET;
};
/* set field name */
@@ -113,6 +116,7 @@ struct dcn_hubbub_registers {
#define HUBBUB_MASK_SH_LIST_DCN(mask_sh)\
HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \
HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \
HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \
HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \
@@ -142,6 +146,7 @@ struct dcn_hubbub_registers {
type DCHUBBUB_ARB_SAT_LEVEL;\
type DCHUBBUB_ARB_MIN_REQ_OUTSTAND;\
type DCHUBBUB_GLOBAL_TIMER_REFDIV;\
+ type DCHUBBUB_GLOBAL_SOFT_RESET; \
type SDPIF_FB_TOP;\
type SDPIF_FB_BASE;\
type SDPIF_FB_OFFSET;\
@@ -173,12 +178,6 @@ struct dcn_hubbub_wm {
struct dcn_hubbub_wm_set sets[4];
};
-struct hubbub_funcs {
- void (*update_dchub)(
- struct hubbub *hubbub,
- struct dchub_init_data *dh_data);
-};
-
struct hubbub {
const struct hubbub_funcs *funcs;
struct dc_context *ctx;
@@ -206,6 +205,7 @@ void hubbub1_toggle_watermark_change_req(
void hubbub1_wm_read_state(struct hubbub *hubbub,
struct dcn_hubbub_wm *wm);
+void hubbub1_soft_reset(struct hubbub *hubbub, bool reset);
void hubbub1_construct(struct hubbub *hubbub,
struct dc_context *ctx,
const struct dcn_hubbub_registers *hubbub_regs,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 39b72f696ae9..d2ab78b35a7a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -78,6 +78,27 @@ static void hubp1_disconnect(struct hubp *hubp)
CURSOR_ENABLE, 0);
}
+static void hubp1_disable_control(struct hubp *hubp, bool disable_hubp)
+{
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+ uint32_t disable = disable_hubp ? 1 : 0;
+
+ REG_UPDATE(DCHUBP_CNTL,
+ HUBP_DISABLE, disable);
+}
+
+static unsigned int hubp1_get_underflow_status(struct hubp *hubp)
+{
+ uint32_t hubp_underflow = 0;
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+ REG_GET(DCHUBP_CNTL,
+ HUBP_UNDERFLOW_STATUS,
+ &hubp_underflow);
+
+ return hubp_underflow;
+}
+
static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank)
{
struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
@@ -146,6 +167,9 @@ void hubp1_program_size_and_rotation(
* 444 or 420 luma
*/
if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
+ ASSERT(plane_size->video.chroma_pitch != 0);
+ /* Chroma pitch zero can cause system hang! */
+
pitch = plane_size->video.luma_pitch - 1;
meta_pitch = dcc->video.meta_pitch_l - 1;
pitch_c = plane_size->video.chroma_pitch - 1;
@@ -535,11 +559,13 @@ void hubp1_program_deadline(
REG_SET(VBLANK_PARAMETERS_3, 0,
REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l);
- REG_SET(NOM_PARAMETERS_0, 0,
- DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l);
+ if (REG(NOM_PARAMETERS_0))
+ REG_SET(NOM_PARAMETERS_0, 0,
+ DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l);
- REG_SET(NOM_PARAMETERS_1, 0,
- REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l);
+ if (REG(NOM_PARAMETERS_1))
+ REG_SET(NOM_PARAMETERS_1, 0,
+ REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l);
REG_SET(NOM_PARAMETERS_4, 0,
DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l);
@@ -568,11 +594,13 @@ void hubp1_program_deadline(
REG_SET(VBLANK_PARAMETERS_4, 0,
REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c);
- REG_SET(NOM_PARAMETERS_2, 0,
- DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c);
+ if (REG(NOM_PARAMETERS_2))
+ REG_SET(NOM_PARAMETERS_2, 0,
+ DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c);
- REG_SET(NOM_PARAMETERS_3, 0,
- REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c);
+ if (REG(NOM_PARAMETERS_3))
+ REG_SET(NOM_PARAMETERS_3, 0,
+ REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c);
REG_SET(NOM_PARAMETERS_6, 0,
DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c);
@@ -609,6 +637,13 @@ void hubp1_program_deadline(
REG_SET(DCN_SURF1_TTU_CNTL1, 0,
REFCYC_PER_REQ_DELIVERY_PRE,
ttu_attr->refcyc_per_req_delivery_pre_c);
+
+ REG_SET_3(DCN_CUR0_TTU_CNTL0, 0,
+ REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0,
+ QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0,
+ QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0);
+ REG_SET(DCN_CUR0_TTU_CNTL1, 0,
+ REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0);
}
static void hubp1_setup(
@@ -752,9 +787,159 @@ void min_set_viewport(
PRI_VIEWPORT_Y_START_C, viewport_c->y);
}
-void hubp1_read_state(struct dcn10_hubp *hubp1,
- struct dcn_hubp_state *s)
+void hubp1_read_state(struct hubp *hubp)
{
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+ struct dcn_hubp_state *s = &hubp1->state;
+ struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr;
+ struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr;
+ struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
+
+ /* Requester */
+ REG_GET(HUBPRET_CONTROL,
+ DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address);
+ REG_GET_4(DCN_EXPANSION_MODE,
+ DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode,
+ PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode,
+ MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode,
+ CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode);
+ REG_GET_8(DCHUBP_REQ_SIZE_CONFIG,
+ CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size,
+ MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size,
+ META_CHUNK_SIZE, &rq_regs->rq_regs_l.meta_chunk_size,
+ MIN_META_CHUNK_SIZE, &rq_regs->rq_regs_l.min_meta_chunk_size,
+ DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size,
+ MPTE_GROUP_SIZE, &rq_regs->rq_regs_l.mpte_group_size,
+ SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height,
+ PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear);
+ REG_GET_8(DCHUBP_REQ_SIZE_CONFIG_C,
+ CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size,
+ MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size,
+ META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.meta_chunk_size,
+ MIN_META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_meta_chunk_size,
+ DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size,
+ MPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.mpte_group_size,
+ SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height,
+ PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear);
+
+ /* DLG - Per hubp */
+ REG_GET_2(BLANK_OFFSET_0,
+ REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end,
+ DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end);
+
+ REG_GET(BLANK_OFFSET_1,
+ MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start);
+
+ REG_GET(DST_DIMENSIONS,
+ REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal);
+
+ REG_GET_2(DST_AFTER_SCALER,
+ REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler,
+ DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler);
+
+ if (REG(PREFETCH_SETTINS))
+ REG_GET_2(PREFETCH_SETTINS,
+ DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch,
+ VRATIO_PREFETCH, &dlg_attr->vratio_prefetch);
+ else
+ REG_GET_2(PREFETCH_SETTINGS,
+ DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch,
+ VRATIO_PREFETCH, &dlg_attr->vratio_prefetch);
+
+ REG_GET_2(VBLANK_PARAMETERS_0,
+ DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank,
+ DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank);
+
+ REG_GET(REF_FREQ_TO_PIX_FREQ,
+ REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq);
+
+ /* DLG - Per luma/chroma */
+ REG_GET(VBLANK_PARAMETERS_1,
+ REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l);
+
+ REG_GET(VBLANK_PARAMETERS_3,
+ REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l);
+
+ if (REG(NOM_PARAMETERS_0))
+ REG_GET(NOM_PARAMETERS_0,
+ DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l);
+
+ if (REG(NOM_PARAMETERS_1))
+ REG_GET(NOM_PARAMETERS_1,
+ REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l);
+
+ REG_GET(NOM_PARAMETERS_4,
+ DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l);
+
+ REG_GET(NOM_PARAMETERS_5,
+ REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l);
+
+ REG_GET_2(PER_LINE_DELIVERY_PRE,
+ REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l,
+ REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c);
+
+ REG_GET_2(PER_LINE_DELIVERY,
+ REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l,
+ REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c);
+
+ if (REG(PREFETCH_SETTINS_C))
+ REG_GET(PREFETCH_SETTINS_C,
+ VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c);
+ else
+ REG_GET(PREFETCH_SETTINGS_C,
+ VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c);
+
+ REG_GET(VBLANK_PARAMETERS_2,
+ REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c);
+
+ REG_GET(VBLANK_PARAMETERS_4,
+ REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c);
+
+ if (REG(NOM_PARAMETERS_2))
+ REG_GET(NOM_PARAMETERS_2,
+ DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c);
+
+ if (REG(NOM_PARAMETERS_3))
+ REG_GET(NOM_PARAMETERS_3,
+ REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c);
+
+ REG_GET(NOM_PARAMETERS_6,
+ DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c);
+
+ REG_GET(NOM_PARAMETERS_7,
+ REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c);
+
+ /* TTU - per hubp */
+ REG_GET_2(DCN_TTU_QOS_WM,
+ QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm,
+ QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm);
+
+ REG_GET_2(DCN_GLOBAL_TTU_CNTL,
+ MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank,
+ QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip);
+
+ /* TTU - per luma/chroma */
+ /* Assumed surf0 is luma and 1 is chroma */
+
+ REG_GET_3(DCN_SURF0_TTU_CNTL0,
+ REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l,
+ QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l,
+ QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l);
+
+ REG_GET(DCN_SURF0_TTU_CNTL1,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ &ttu_attr->refcyc_per_req_delivery_pre_l);
+
+ REG_GET_3(DCN_SURF1_TTU_CNTL0,
+ REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c,
+ QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c,
+ QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c);
+
+ REG_GET(DCN_SURF1_TTU_CNTL1,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ &ttu_attr->refcyc_per_req_delivery_pre_c);
+
+ /* Rest of hubp */
REG_GET(DCSURF_SURFACE_CONFIG,
SURFACE_PIXEL_FORMAT, &s->pixel_format);
@@ -890,14 +1075,14 @@ void hubp1_cursor_set_position(
ASSERT(param->h_scale_ratio.value);
if (param->h_scale_ratio.value)
- dst_x_offset = dal_fixed31_32_floor(dal_fixed31_32_div(
- dal_fixed31_32_from_int(dst_x_offset),
+ dst_x_offset = dc_fixpt_floor(dc_fixpt_div(
+ dc_fixpt_from_int(dst_x_offset),
param->h_scale_ratio));
if (src_x_offset >= (int)param->viewport_width)
cur_en = 0; /* not visible beyond right edge*/
- if (src_x_offset + (int)hubp->curs_attr.width < 0)
+ if (src_x_offset + (int)hubp->curs_attr.width <= 0)
cur_en = 0; /* not visible beyond left edge*/
if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
@@ -952,6 +1137,10 @@ static struct hubp_funcs dcn10_hubp_funcs = {
.hubp_disconnect = hubp1_disconnect,
.hubp_clk_cntl = hubp1_clk_cntl,
.hubp_vtg_sel = hubp1_vtg_sel,
+ .hubp_read_state = hubp1_read_state,
+ .hubp_disable_control = hubp1_disable_control,
+ .hubp_get_underflow_status = hubp1_get_underflow_status,
+
};
/*****************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index 4a3703e12ea1..af384034398f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -30,6 +30,7 @@
#define TO_DCN10_HUBP(hubp)\
container_of(hubp, struct dcn10_hubp, base)
+/* Register address initialization macro for all ASICs (including those with reduced functionality) */
#define HUBP_REG_LIST_DCN(id)\
SRI(DCHUBP_CNTL, HUBP, id),\
SRI(HUBPREQ_DEBUG_DB, HUBP, id),\
@@ -78,16 +79,12 @@
SRI(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id),\
SRI(VBLANK_PARAMETERS_1, HUBPREQ, id),\
SRI(VBLANK_PARAMETERS_3, HUBPREQ, id),\
- SRI(NOM_PARAMETERS_0, HUBPREQ, id),\
- SRI(NOM_PARAMETERS_1, HUBPREQ, id),\
SRI(NOM_PARAMETERS_4, HUBPREQ, id),\
SRI(NOM_PARAMETERS_5, HUBPREQ, id),\
SRI(PER_LINE_DELIVERY_PRE, HUBPREQ, id),\
SRI(PER_LINE_DELIVERY, HUBPREQ, id),\
SRI(VBLANK_PARAMETERS_2, HUBPREQ, id),\
SRI(VBLANK_PARAMETERS_4, HUBPREQ, id),\
- SRI(NOM_PARAMETERS_2, HUBPREQ, id),\
- SRI(NOM_PARAMETERS_3, HUBPREQ, id),\
SRI(NOM_PARAMETERS_6, HUBPREQ, id),\
SRI(NOM_PARAMETERS_7, HUBPREQ, id),\
SRI(DCN_TTU_QOS_WM, HUBPREQ, id),\
@@ -96,11 +93,21 @@
SRI(DCN_SURF0_TTU_CNTL1, HUBPREQ, id),\
SRI(DCN_SURF1_TTU_CNTL0, HUBPREQ, id),\
SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\
- SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id),\
+ SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\
+ SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\
SRI(HUBP_CLK_CNTL, HUBP, id)
+/* Register address initialization macro for ASICs with VM */
+#define HUBP_REG_LIST_DCN_VM(id)\
+ SRI(NOM_PARAMETERS_0, HUBPREQ, id),\
+ SRI(NOM_PARAMETERS_1, HUBPREQ, id),\
+ SRI(NOM_PARAMETERS_2, HUBPREQ, id),\
+ SRI(NOM_PARAMETERS_3, HUBPREQ, id),\
+ SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id)
+
#define HUBP_REG_LIST_DCN10(id)\
HUBP_REG_LIST_DCN(id),\
+ HUBP_REG_LIST_DCN_VM(id),\
SRI(PREFETCH_SETTINS, HUBPREQ, id),\
SRI(PREFETCH_SETTINS_C, HUBPREQ, id),\
SRI(DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB, HUBPREQ, id),\
@@ -198,6 +205,8 @@
uint32_t DCN_SURF0_TTU_CNTL1; \
uint32_t DCN_SURF1_TTU_CNTL0; \
uint32_t DCN_SURF1_TTU_CNTL1; \
+ uint32_t DCN_CUR0_TTU_CNTL0; \
+ uint32_t DCN_CUR0_TTU_CNTL1; \
uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \
uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \
uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \
@@ -237,12 +246,14 @@
#define HUBP_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
+/* Mask/shift struct generation macro for all ASICs (including those with reduced functionality) */
#define HUBP_MASK_SH_LIST_DCN(mask_sh)\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\
HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\
HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\
@@ -335,8 +346,6 @@
HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\
- HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\
- HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\
HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\
HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\
HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\
@@ -345,8 +354,6 @@
HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\
- HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\
- HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\
HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\
HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\
HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\
@@ -357,12 +364,24 @@
HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\
HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\
HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\
+ HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh)
+
+/* Mask/shift struct generation macro for ASICs with VM */
+#define HUBP_MASK_SH_LIST_DCN_VM(mask_sh)\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\
HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, mask_sh),\
HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh),\
- HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh)
+ HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh)
#define HUBP_MASK_SH_LIST_DCN10(mask_sh)\
HUBP_MASK_SH_LIST_DCN(mask_sh),\
+ HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\
HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, DST_Y_PREFETCH, mask_sh),\
HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, VRATIO_PREFETCH, mask_sh),\
HUBP_SF(HUBPREQ0_PREFETCH_SETTINS_C, VRATIO_PREFETCH_C, mask_sh),\
@@ -403,6 +422,7 @@
#define DCN_HUBP_REG_FIELD_LIST(type) \
type HUBP_BLANK_EN;\
+ type HUBP_DISABLE;\
type HUBP_TTU_DISABLE;\
type HUBP_NO_OUTSTANDING_REQ;\
type HUBP_VTG_SEL;\
@@ -601,8 +621,29 @@ struct dcn_mi_mask {
DCN_HUBP_REG_FIELD_LIST(uint32_t);
};
+struct dcn_hubp_state {
+ struct _vcs_dpi_display_dlg_regs_st dlg_attr;
+ struct _vcs_dpi_display_ttu_regs_st ttu_attr;
+ struct _vcs_dpi_display_rq_regs_st rq_regs;
+ uint32_t pixel_format;
+ uint32_t inuse_addr_hi;
+ uint32_t viewport_width;
+ uint32_t viewport_height;
+ uint32_t rotation_angle;
+ uint32_t h_mirror_en;
+ uint32_t sw_mode;
+ uint32_t dcc_en;
+ uint32_t blank_en;
+ uint32_t underflow_status;
+ uint32_t ttu_disable;
+ uint32_t min_ttu_vblank;
+ uint32_t qos_level_low_wm;
+ uint32_t qos_level_high_wm;
+};
+
struct dcn10_hubp {
struct hubp base;
+ struct dcn_hubp_state state;
const struct dcn_mi_registers *hubp_regs;
const struct dcn_mi_shift *hubp_shift;
const struct dcn_mi_mask *hubp_mask;
@@ -680,26 +721,9 @@ void dcn10_hubp_construct(
const struct dcn_mi_shift *hubp_shift,
const struct dcn_mi_mask *hubp_mask);
-
-struct dcn_hubp_state {
- uint32_t pixel_format;
- uint32_t inuse_addr_hi;
- uint32_t viewport_width;
- uint32_t viewport_height;
- uint32_t rotation_angle;
- uint32_t h_mirror_en;
- uint32_t sw_mode;
- uint32_t dcc_en;
- uint32_t blank_en;
- uint32_t underflow_status;
- uint32_t ttu_disable;
- uint32_t min_ttu_vblank;
- uint32_t qos_level_low_wm;
- uint32_t qos_level_high_wm;
-};
-void hubp1_read_state(struct dcn10_hubp *hubp1,
- struct dcn_hubp_state *s);
+void hubp1_read_state(struct hubp *hubp);
enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 8b0f6b8a5627..f8e0576af6e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -45,8 +45,8 @@
#include "dcn10_hubbub.h"
#include "dcn10_cm_common.h"
-#define DC_LOGGER \
- ctx->logger
+#define DC_LOGGER_INIT(logger)
+
#define CTX \
hws->ctx
#define REG(reg)\
@@ -56,16 +56,17 @@
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
+/*print is 17 wide, first two characters are spaces*/
#define DTN_INFO_MICRO_SEC(ref_cycle) \
print_microsec(dc_ctx, ref_cycle)
void print_microsec(struct dc_context *dc_ctx, uint32_t ref_cycle)
{
- static const uint32_t ref_clk_mhz = 48;
- static const unsigned int frac = 10;
+ const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000;
+ static const unsigned int frac = 1000;
uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz;
- DTN_INFO("%d.%d \t ",
+ DTN_INFO(" %11d.%03d",
us_x10 / frac,
us_x10 % frac);
}
@@ -92,14 +93,14 @@ void dcn10_log_hubbub_state(struct dc *dc)
hubbub1_wm_read_state(dc->res_pool->hubbub, &wm);
- DTN_INFO("HUBBUB WM: \t data_urgent \t pte_meta_urgent \t "
- "sr_enter \t sr_exit \t dram_clk_change \n");
+ DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent"
+ " sr_enter sr_exit dram_clk_change\n");
for (i = 0; i < 4; i++) {
struct dcn_hubbub_wm_set *s;
s = &wm.sets[i];
- DTN_INFO("WM_Set[%d]:\t ", s->wm_set);
+ DTN_INFO("WM_Set[%d]:", s->wm_set);
DTN_INFO_MICRO_SEC(s->data_urgent);
DTN_INFO_MICRO_SEC(s->pte_meta_urgent);
DTN_INFO_MICRO_SEC(s->sr_enter);
@@ -111,6 +112,121 @@ void dcn10_log_hubbub_state(struct dc *dc)
DTN_INFO("\n");
}
+static void dcn10_log_hubp_states(struct dc *dc)
+{
+ struct dc_context *dc_ctx = dc->ctx;
+ struct resource_pool *pool = dc->res_pool;
+ int i;
+
+ DTN_INFO("HUBP: format addr_hi width height"
+ " rot mir sw_mode dcc_en blank_en ttu_dis underflow"
+ " min_ttu_vblank qos_low_wm qos_high_wm\n");
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct hubp *hubp = pool->hubps[i];
+ struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state);
+
+ hubp->funcs->hubp_read_state(hubp);
+
+ if (!s->blank_en) {
+ DTN_INFO("[%2d]: %5xh %6xh %5d %6d %2xh %2xh %6xh"
+ " %6d %8d %7d %8xh",
+ hubp->inst,
+ s->pixel_format,
+ s->inuse_addr_hi,
+ s->viewport_width,
+ s->viewport_height,
+ s->rotation_angle,
+ s->h_mirror_en,
+ s->sw_mode,
+ s->dcc_en,
+ s->blank_en,
+ s->ttu_disable,
+ s->underflow_status);
+ DTN_INFO_MICRO_SEC(s->min_ttu_vblank);
+ DTN_INFO_MICRO_SEC(s->qos_level_low_wm);
+ DTN_INFO_MICRO_SEC(s->qos_level_high_wm);
+ DTN_INFO("\n");
+ }
+ }
+
+ DTN_INFO("\n=========RQ========\n");
+ DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s"
+ " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s"
+ " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h\n");
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+ struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
+
+ if (!s->blank_en)
+ DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
+ pool->hubps[i]->inst, rq_regs->drq_expansion_mode, rq_regs->prq_expansion_mode, rq_regs->mrq_expansion_mode,
+ rq_regs->crq_expansion_mode, rq_regs->plane1_base_address, rq_regs->rq_regs_l.chunk_size,
+ rq_regs->rq_regs_l.min_chunk_size, rq_regs->rq_regs_l.meta_chunk_size,
+ rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs->rq_regs_l.dpte_group_size,
+ rq_regs->rq_regs_l.mpte_group_size, rq_regs->rq_regs_l.swath_height,
+ rq_regs->rq_regs_l.pte_row_height_linear, rq_regs->rq_regs_c.chunk_size, rq_regs->rq_regs_c.min_chunk_size,
+ rq_regs->rq_regs_c.meta_chunk_size, rq_regs->rq_regs_c.min_meta_chunk_size,
+ rq_regs->rq_regs_c.dpte_group_size, rq_regs->rq_regs_c.mpte_group_size,
+ rq_regs->rq_regs_c.swath_height, rq_regs->rq_regs_c.pte_row_height_linear);
+ }
+
+ DTN_INFO("========DLG========\n");
+ DTN_INFO("HUBP: rc_hbe dlg_vbe min_d_y_n rc_per_ht rc_x_a_s "
+ " dst_y_a_s dst_y_pf dst_y_vvb dst_y_rvb dst_y_vfl dst_y_rfl rf_pix_fq"
+ " vratio_pf vrat_pf_c rc_pg_vbl rc_pg_vbc rc_mc_vbl rc_mc_vbc rc_pg_fll"
+ " rc_pg_flc rc_mc_fll rc_mc_flc pr_nom_l pr_nom_c rc_pg_nl rc_pg_nc "
+ " mr_nom_l mr_nom_c rc_mc_nl rc_mc_nc rc_ld_pl rc_ld_pc rc_ld_l "
+ " rc_ld_c cha_cur0 ofst_cur1 cha_cur1 vr_af_vc0 ddrq_limt x_rt_dlay"
+ " x_rp_dlay x_rr_sfl\n");
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr;
+
+ if (!s->blank_en)
+ DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
+ "% 8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
+ " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
+ pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start,
+ dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler,
+ dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank,
+ dlg_regs->dst_y_per_vm_flip, dlg_regs->dst_y_per_row_flip, dlg_regs->ref_freq_to_pix_freq,
+ dlg_regs->vratio_prefetch, dlg_regs->vratio_prefetch_c, dlg_regs->refcyc_per_pte_group_vblank_l,
+ dlg_regs->refcyc_per_pte_group_vblank_c, dlg_regs->refcyc_per_meta_chunk_vblank_l,
+ dlg_regs->refcyc_per_meta_chunk_vblank_c, dlg_regs->refcyc_per_pte_group_flip_l,
+ dlg_regs->refcyc_per_pte_group_flip_c, dlg_regs->refcyc_per_meta_chunk_flip_l,
+ dlg_regs->refcyc_per_meta_chunk_flip_c, dlg_regs->dst_y_per_pte_row_nom_l,
+ dlg_regs->dst_y_per_pte_row_nom_c, dlg_regs->refcyc_per_pte_group_nom_l,
+ dlg_regs->refcyc_per_pte_group_nom_c, dlg_regs->dst_y_per_meta_row_nom_l,
+ dlg_regs->dst_y_per_meta_row_nom_c, dlg_regs->refcyc_per_meta_chunk_nom_l,
+ dlg_regs->refcyc_per_meta_chunk_nom_c, dlg_regs->refcyc_per_line_delivery_pre_l,
+ dlg_regs->refcyc_per_line_delivery_pre_c, dlg_regs->refcyc_per_line_delivery_l,
+ dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1,
+ dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit,
+ dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay,
+ dlg_regs->xfc_reg_remote_surface_flip_latency);
+ }
+
+ DTN_INFO("========TTU========\n");
+ DTN_INFO("HUBP: qos_ll_wm qos_lh_wm mn_ttu_vb qos_l_flp rc_rd_p_l rc_rd_l rc_rd_p_c"
+ " rc_rd_c rc_rd_c0 rc_rd_pc0 rc_rd_c1 rc_rd_pc1 qos_lf_l qos_rds_l"
+ " qos_lf_c qos_rds_c qos_lf_c0 qos_rds_c0 qos_lf_c1 qos_rds_c1\n");
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+ struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &s->ttu_attr;
+
+ if (!s->blank_en)
+ DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
+ pool->hubps[i]->inst, ttu_regs->qos_level_low_wm, ttu_regs->qos_level_high_wm, ttu_regs->min_ttu_vblank,
+ ttu_regs->qos_level_flip, ttu_regs->refcyc_per_req_delivery_pre_l, ttu_regs->refcyc_per_req_delivery_l,
+ ttu_regs->refcyc_per_req_delivery_pre_c, ttu_regs->refcyc_per_req_delivery_c, ttu_regs->refcyc_per_req_delivery_cur0,
+ ttu_regs->refcyc_per_req_delivery_pre_cur0, ttu_regs->refcyc_per_req_delivery_cur1,
+ ttu_regs->refcyc_per_req_delivery_pre_cur1, ttu_regs->qos_level_fixed_l, ttu_regs->qos_ramp_disable_l,
+ ttu_regs->qos_level_fixed_c, ttu_regs->qos_ramp_disable_c, ttu_regs->qos_level_fixed_cur0,
+ ttu_regs->qos_ramp_disable_cur0, ttu_regs->qos_level_fixed_cur1, ttu_regs->qos_ramp_disable_cur1);
+ }
+ DTN_INFO("\n");
+}
+
void dcn10_log_hw_state(struct dc *dc)
{
struct dc_context *dc_ctx = dc->ctx;
@@ -121,41 +237,64 @@ void dcn10_log_hw_state(struct dc *dc)
dcn10_log_hubbub_state(dc);
- DTN_INFO("HUBP:\t format \t addr_hi \t width \t height \t "
- "rotation \t mirror \t sw_mode \t "
- "dcc_en \t blank_en \t ttu_dis \t underflow \t "
- "min_ttu_vblank \t qos_low_wm \t qos_high_wm \n");
+ dcn10_log_hubp_states(dc);
+ DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode"
+ " GAMUT mode C11 C12 C13 C14 C21 C22 C23 C24 "
+ "C31 C32 C33 C34\n");
for (i = 0; i < pool->pipe_count; i++) {
- struct hubp *hubp = pool->hubps[i];
- struct dcn_hubp_state s;
-
- hubp1_read_state(TO_DCN10_HUBP(hubp), &s);
-
- DTN_INFO("[%d]:\t %xh \t %xh \t %d \t %d \t "
- "%xh \t %xh \t %xh \t "
- "%d \t %d \t %d \t %xh \t",
- hubp->inst,
- s.pixel_format,
- s.inuse_addr_hi,
- s.viewport_width,
- s.viewport_height,
- s.rotation_angle,
- s.h_mirror_en,
- s.sw_mode,
- s.dcc_en,
- s.blank_en,
- s.ttu_disable,
- s.underflow_status);
- DTN_INFO_MICRO_SEC(s.min_ttu_vblank);
- DTN_INFO_MICRO_SEC(s.qos_level_low_wm);
- DTN_INFO_MICRO_SEC(s.qos_level_high_wm);
+ struct dpp *dpp = pool->dpps[i];
+ struct dcn_dpp_state s;
+
+ dpp->funcs->dpp_read_state(dpp, &s);
+
+ DTN_INFO("[%2d]: %11xh %-11s %-11s %-11s"
+ "%8x %08xh %08xh %08xh %08xh %08xh %08xh",
+ dpp->inst,
+ s.igam_input_format,
+ (s.igam_lut_mode == 0) ? "BypassFixed" :
+ ((s.igam_lut_mode == 1) ? "BypassFloat" :
+ ((s.igam_lut_mode == 2) ? "RAM" :
+ ((s.igam_lut_mode == 3) ? "RAM" :
+ "Unknown"))),
+ (s.dgam_lut_mode == 0) ? "Bypass" :
+ ((s.dgam_lut_mode == 1) ? "sRGB" :
+ ((s.dgam_lut_mode == 2) ? "Ycc" :
+ ((s.dgam_lut_mode == 3) ? "RAM" :
+ ((s.dgam_lut_mode == 4) ? "RAM" :
+ "Unknown")))),
+ (s.rgam_lut_mode == 0) ? "Bypass" :
+ ((s.rgam_lut_mode == 1) ? "sRGB" :
+ ((s.rgam_lut_mode == 2) ? "Ycc" :
+ ((s.rgam_lut_mode == 3) ? "RAM" :
+ ((s.rgam_lut_mode == 4) ? "RAM" :
+ "Unknown")))),
+ s.gamut_remap_mode,
+ s.gamut_remap_c11_c12,
+ s.gamut_remap_c13_c14,
+ s.gamut_remap_c21_c22,
+ s.gamut_remap_c23_c24,
+ s.gamut_remap_c31_c32,
+ s.gamut_remap_c33_c34);
DTN_INFO("\n");
}
DTN_INFO("\n");
- DTN_INFO("OTG:\t v_bs \t v_be \t v_ss \t v_se \t vpol \t vmax \t vmin \t "
- "h_bs \t h_be \t h_ss \t h_se \t hpol \t htot \t vtot \t underflow\n");
+ DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE\n");
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct mpcc_state s = {0};
+
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+ if (s.opp_id != 0xf)
+ DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d\n",
+ i, s.opp_id, s.dpp_id, s.bot_mpcc_id,
+ s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only,
+ s.idle);
+ }
+ DTN_INFO("\n");
+
+ DTN_INFO("OTG: v_bs v_be v_ss v_se vpol vmax vmin vmax_sel vmin_sel"
+ " h_bs h_be h_ss h_se hpol htot vtot underflow\n");
for (i = 0; i < pool->timing_generator_count; i++) {
struct timing_generator *tg = pool->timing_generators[i];
@@ -167,9 +306,8 @@ void dcn10_log_hw_state(struct dc *dc)
if ((s.otg_enabled & 1) == 0)
continue;
- DTN_INFO("[%d]:\t %d \t %d \t %d \t %d \t "
- "%d \t %d \t %d \t %d \t %d \t %d \t "
- "%d \t %d \t %d \t %d \t %d \t ",
+ DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d"
+ " %5d %5d %5d %5d %9d\n",
tg->inst,
s.v_blank_start,
s.v_blank_end,
@@ -178,6 +316,8 @@ void dcn10_log_hw_state(struct dc *dc)
s.v_sync_a_pol,
s.v_total_max,
s.v_total_min,
+ s.v_total_max_sel,
+ s.v_total_min_sel,
s.h_blank_start,
s.h_blank_end,
s.h_sync_a_start,
@@ -186,10 +326,25 @@ void dcn10_log_hw_state(struct dc *dc)
s.h_total,
s.v_total,
s.underflow_occurred_status);
- DTN_INFO("\n");
+
+ // Clear underflow for debug purposes
+ // We want to keep underflow sticky bit on for the longevity tests outside of test environment.
+ // This function is called only from Windows or Diags test environment, hence it's safe to clear
+ // it from here without affecting the original intent.
+ tg->funcs->clear_optc_underflow(tg);
}
DTN_INFO("\n");
+ DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d dcfclk_deep_sleep_khz:%d dispclk_khz:%d\n"
+ "dppclk_khz:%d max_supported_dppclk_khz:%d fclk_khz:%d socclk_khz:%d\n\n",
+ dc->current_state->bw.dcn.calc_clk.dcfclk_khz,
+ dc->current_state->bw.dcn.calc_clk.dcfclk_deep_sleep_khz,
+ dc->current_state->bw.dcn.calc_clk.dispclk_khz,
+ dc->current_state->bw.dcn.calc_clk.dppclk_khz,
+ dc->current_state->bw.dcn.calc_clk.max_supported_dppclk_khz,
+ dc->current_state->bw.dcn.calc_clk.fclk_khz,
+ dc->current_state->bw.dcn.calc_clk.socclk_khz);
+
log_mpc_crc(dc);
DTN_INFO_END();
@@ -354,7 +509,7 @@ static void power_on_plane(
struct dce_hwseq *hws,
int plane_id)
{
- struct dc_context *ctx = hws->ctx;
+ DC_LOGGER_INIT(hws->ctx->logger);
if (REG(DC_IP_REQUEST_CNTL)) {
REG_SET(DC_IP_REQUEST_CNTL, 0,
IP_REQUEST_EN, 1);
@@ -461,7 +616,7 @@ static void false_optc_underflow_wa(
tg->funcs->clear_optc_underflow(tg);
}
-static enum dc_status dcn10_prog_pixclk_crtc_otg(
+static enum dc_status dcn10_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
struct dc *dc)
@@ -553,7 +708,7 @@ static void reset_back_end_for_pipe(
struct dc_state *context)
{
int i;
- struct dc_context *ctx = dc->ctx;
+ DC_LOGGER_INIT(dc->ctx->logger);
if (pipe_ctx->stream_res.stream_enc == NULL) {
pipe_ctx->stream = NULL;
return;
@@ -603,6 +758,90 @@ static void reset_back_end_for_pipe(
pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
}
+static bool dcn10_hw_wa_force_recovery(struct dc *dc)
+{
+ struct hubp *hubp ;
+ unsigned int i;
+ bool need_recover = true;
+
+ if (!dc->debug.recovery_enabled)
+ return false;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx != NULL) {
+ hubp = pipe_ctx->plane_res.hubp;
+ if (hubp != NULL) {
+ if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) {
+ /* one pipe underflow, we will reset all the pipes*/
+ need_recover = true;
+ }
+ }
+ }
+ }
+ if (!need_recover)
+ return false;
+ /*
+ DCHUBP_CNTL:HUBP_BLANK_EN=1
+ DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1
+ DCHUBP_CNTL:HUBP_DISABLE=1
+ DCHUBP_CNTL:HUBP_DISABLE=0
+ DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0
+ DCSURF_PRIMARY_SURFACE_ADDRESS
+ DCHUBP_CNTL:HUBP_BLANK_EN=0
+ */
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx != NULL) {
+ hubp = pipe_ctx->plane_res.hubp;
+ /*DCHUBP_CNTL:HUBP_BLANK_EN=1*/
+ if (hubp != NULL)
+ hubp->funcs->set_hubp_blank_en(hubp, true);
+ }
+ }
+ /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1*/
+ hubbub1_soft_reset(dc->res_pool->hubbub, true);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx != NULL) {
+ hubp = pipe_ctx->plane_res.hubp;
+ /*DCHUBP_CNTL:HUBP_DISABLE=1*/
+ if (hubp != NULL)
+ hubp->funcs->hubp_disable_control(hubp, true);
+ }
+ }
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx != NULL) {
+ hubp = pipe_ctx->plane_res.hubp;
+ /*DCHUBP_CNTL:HUBP_DISABLE=0*/
+ if (hubp != NULL)
+ hubp->funcs->hubp_disable_control(hubp, true);
+ }
+ }
+ /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0*/
+ hubbub1_soft_reset(dc->res_pool->hubbub, false);
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx != NULL) {
+ hubp = pipe_ctx->plane_res.hubp;
+ /*DCHUBP_CNTL:HUBP_BLANK_EN=0*/
+ if (hubp != NULL)
+ hubp->funcs->set_hubp_blank_en(hubp, true);
+ }
+ }
+ return true;
+
+}
+
+
static void dcn10_verify_allow_pstate_change_high(struct dc *dc)
{
static bool should_log_hw_state; /* prevent hw state log by default */
@@ -611,13 +850,17 @@ static void dcn10_verify_allow_pstate_change_high(struct dc *dc)
if (should_log_hw_state) {
dcn10_log_hw_state(dc);
}
-
BREAK_TO_DEBUGGER();
+ if (dcn10_hw_wa_force_recovery(dc)) {
+ /*check again*/
+ if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub))
+ BREAK_TO_DEBUGGER();
+ }
}
}
/* trigger HW to start disconnect plane from stream on the next vsync */
-static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
{
struct hubp *hubp = pipe_ctx->plane_res.hubp;
int dpp_id = pipe_ctx->plane_res.dpp->inst;
@@ -649,7 +892,7 @@ static void plane_atomic_power_down(struct dc *dc, struct pipe_ctx *pipe_ctx)
{
struct dce_hwseq *hws = dc->hwseq;
struct dpp *dpp = pipe_ctx->plane_res.dpp;
- struct dc_context *ctx = dc->ctx;
+ DC_LOGGER_INIT(dc->ctx->logger);
if (REG(DC_IP_REQUEST_CNTL)) {
REG_SET(DC_IP_REQUEST_CNTL, 0,
@@ -699,7 +942,7 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
{
- struct dc_context *ctx = dc->ctx;
+ DC_LOGGER_INIT(dc->ctx->logger);
if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
return;
@@ -800,7 +1043,7 @@ static void dcn10_init_hw(struct dc *dc)
dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
- plane_atomic_disconnect(dc, pipe_ctx);
+ hwss1_plane_atomic_disconnect(dc, pipe_ctx);
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -945,9 +1188,8 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
tf = plane_state->in_transfer_func;
if (plane_state->gamma_correction &&
- plane_state->gamma_correction->is_identity)
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
- else if (plane_state->gamma_correction && dce_use_lut(plane_state->format))
+ !plane_state->gamma_correction->is_identity
+ && dce_use_lut(plane_state->format))
dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction);
if (tf == NULL)
@@ -1433,7 +1675,7 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx,
}
}
-static void program_output_csc(struct dc *dc,
+static void dcn10_program_output_csc(struct dc *dc,
struct pipe_ctx *pipe_ctx,
enum dc_color_space colorspace,
uint16_t *matrix,
@@ -1542,22 +1784,22 @@ static uint16_t fixed_point_to_int_frac(
uint16_t result;
- uint16_t d = (uint16_t)dal_fixed31_32_floor(
- dal_fixed31_32_abs(
+ uint16_t d = (uint16_t)dc_fixpt_floor(
+ dc_fixpt_abs(
arg));
if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
- numerator = (uint16_t)dal_fixed31_32_floor(
- dal_fixed31_32_mul_int(
+ numerator = (uint16_t)dc_fixpt_floor(
+ dc_fixpt_mul_int(
arg,
divisor));
else {
- numerator = dal_fixed31_32_floor(
- dal_fixed31_32_sub(
- dal_fixed31_32_from_int(
+ numerator = dc_fixpt_floor(
+ dc_fixpt_sub(
+ dc_fixpt_from_int(
1LL << integer_bits),
- dal_fixed31_32_recip(
- dal_fixed31_32_from_int(
+ dc_fixpt_recip(
+ dc_fixpt_from_int(
divisor))));
}
@@ -1567,8 +1809,8 @@ static uint16_t fixed_point_to_int_frac(
result = (uint16_t)(
(1 << (integer_bits + fractional_bits + 1)) + numerator);
- if ((result != 0) && dal_fixed31_32_lt(
- arg, dal_fixed31_32_zero))
+ if ((result != 0) && dc_fixpt_lt(
+ arg, dc_fixpt_zero))
result |= 1 << (integer_bits + fractional_bits);
return result;
@@ -1582,8 +1824,8 @@ void build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
&& plane_state->input_csc_color_matrix.enable_adjustment
&& plane_state->coeff_reduction_factor.value != 0) {
bias_and_scale->scale_blue = fixed_point_to_int_frac(
- dal_fixed31_32_mul(plane_state->coeff_reduction_factor,
- dal_fixed31_32_from_fraction(256, 255)),
+ dc_fixpt_mul(plane_state->coeff_reduction_factor,
+ dc_fixpt_from_fraction(256, 255)),
2,
13);
bias_and_scale->scale_red = bias_and_scale->scale_blue;
@@ -1623,6 +1865,8 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
struct mpc *mpc = dc->res_pool->mpc;
struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
+
+
/* TODO: proper fix once fpga works */
if (dc->debug.surface_visual_confirm)
@@ -1649,6 +1893,7 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
pipe_ctx->stream->output_color_space)
&& per_pixel_alpha;
+
/*
* TODO: remove hack
* Note: currently there is a bug in init_hw such that
@@ -1659,6 +1904,12 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
*/
mpcc_id = hubp->inst;
+ /* If there is no full update, don't need to touch MPC tree*/
+ if (!pipe_ctx->plane_state->update_flags.bits.full_update) {
+ mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
+ return;
+ }
+
/* check if this MPCC is already being used */
new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
/* remove MPCC if being used */
@@ -1777,7 +2028,7 @@ static void update_dchubp_dpp(
/*gamut remap*/
program_gamut_remap(pipe_ctx);
- program_output_csc(dc,
+ dc->hwss.program_output_csc(dc,
pipe_ctx,
pipe_ctx->stream->output_color_space,
pipe_ctx->stream->csc_color_matrix.matrix,
@@ -1810,9 +2061,9 @@ static void update_dchubp_dpp(
hubp->funcs->set_blank(hubp, false);
}
-static void dcn10_otg_blank(
+static void dcn10_blank_pixel_data(
struct dc *dc,
- struct stream_resource stream_res,
+ struct stream_resource *stream_res,
struct dc_stream_state *stream,
bool blank)
{
@@ -1823,27 +2074,27 @@ static void dcn10_otg_blank(
color_space = stream->output_color_space;
color_space_to_black_color(dc, color_space, &black_color);
- if (stream_res.tg->funcs->set_blank_color)
- stream_res.tg->funcs->set_blank_color(
- stream_res.tg,
+ if (stream_res->tg->funcs->set_blank_color)
+ stream_res->tg->funcs->set_blank_color(
+ stream_res->tg,
&black_color);
if (!blank) {
- if (stream_res.tg->funcs->set_blank)
- stream_res.tg->funcs->set_blank(stream_res.tg, blank);
- if (stream_res.abm)
- stream_res.abm->funcs->set_abm_level(stream_res.abm, stream->abm_level);
+ if (stream_res->tg->funcs->set_blank)
+ stream_res->tg->funcs->set_blank(stream_res->tg, blank);
+ if (stream_res->abm)
+ stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
} else if (blank) {
- if (stream_res.abm)
- stream_res.abm->funcs->set_abm_immediate_disable(stream_res.abm);
- if (stream_res.tg->funcs->set_blank)
- stream_res.tg->funcs->set_blank(stream_res.tg, blank);
+ if (stream_res->abm)
+ stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
+ if (stream_res->tg->funcs->set_blank)
+ stream_res->tg->funcs->set_blank(stream_res->tg, blank);
}
}
static void set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
{
- struct fixed31_32 multiplier = dal_fixed31_32_from_fraction(
+ struct fixed31_32 multiplier = dc_fixpt_from_fraction(
pipe_ctx->plane_state->sdr_white_level, 80);
uint32_t hw_mult = 0x1f000; // 1.0 default multiplier
struct custom_float_format fmt;
@@ -1876,7 +2127,7 @@ static void program_all_pipe_in_tree(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg);
- dcn10_otg_blank(dc, pipe_ctx->stream_res,
+ dc->hwss.blank_pixel_data(dc, &pipe_ctx->stream_res,
pipe_ctx->stream, blank);
}
@@ -1983,9 +2234,9 @@ static void dcn10_apply_ctx_for_surface(
bool removed_pipe[4] = { false };
unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000;
bool program_water_mark = false;
- struct dc_context *ctx = dc->ctx;
struct pipe_ctx *top_pipe_to_program =
find_top_pipe_for_stream(dc, context, stream);
+ DC_LOGGER_INIT(dc->ctx->logger);
if (!top_pipe_to_program)
return;
@@ -1996,7 +2247,7 @@ static void dcn10_apply_ctx_for_surface(
if (num_planes == 0) {
/* OTG blank before remove all front end */
- dcn10_otg_blank(dc, top_pipe_to_program->stream_res, top_pipe_to_program->stream, true);
+ dc->hwss.blank_pixel_data(dc, &top_pipe_to_program->stream_res, top_pipe_to_program->stream, true);
}
/* Disconnect unused mpcc */
@@ -2027,7 +2278,7 @@ static void dcn10_apply_ctx_for_surface(
old_pipe_ctx->plane_state &&
old_pipe_ctx->stream_res.tg == tg) {
- plane_atomic_disconnect(dc, old_pipe_ctx);
+ hwss1_plane_atomic_disconnect(dc, old_pipe_ctx);
removed_pipe[i] = true;
DC_LOG_DC(
@@ -2335,15 +2586,6 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
}
-static void set_plane_config(
- const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct resource_context *res_ctx)
-{
- /* TODO */
- program_gamut_remap(pipe_ctx);
-}
-
static void dcn10_config_stereo_parameters(
struct dc_stream_state *stream, struct crtc_stereo_flags *flags)
{
@@ -2521,12 +2763,12 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.init_hw = dcn10_init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = dcn10_apply_ctx_for_surface,
- .set_plane_config = set_plane_config,
.update_plane_addr = dcn10_update_plane_addr,
.update_dchub = dcn10_update_dchub,
.update_pending_status = dcn10_update_pending_status,
.set_input_transfer_func = dcn10_set_input_transfer_func,
.set_output_transfer_func = dcn10_set_output_transfer_func,
+ .program_output_csc = dcn10_program_output_csc,
.power_down = dce110_power_down,
.enable_accelerated_mode = dce110_enable_accelerated_mode,
.enable_timing_synchronization = dcn10_enable_timing_synchronization,
@@ -2538,10 +2780,11 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.blank_stream = dce110_blank_stream,
.enable_display_power_gating = dcn10_dummy_display_power_gating,
.disable_plane = dcn10_disable_plane,
+ .blank_pixel_data = dcn10_blank_pixel_data,
.pipe_control_lock = dcn10_pipe_control_lock,
.set_bandwidth = dcn10_set_bandwidth,
.reset_hw_ctx_wrap = reset_hw_ctx_wrap,
- .prog_pixclk_crtc_otg = dcn10_prog_pixclk_crtc_otg,
+ .enable_stream_timing = dcn10_enable_stream_timing,
.set_drr = set_drr,
.get_position = get_position,
.set_static_screen_control = set_static_screen_control,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 6c526b5095d9..44f734b73f9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -37,4 +37,6 @@ extern void fill_display_configs(
bool is_rgb_cspace(enum dc_color_space output_color_space);
+void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+
#endif /* __DC_HWSS_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
new file mode 100644
index 000000000000..21fa40ac0786
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn10_link_encoder.h"
+#include "stream_encoder.h"
+#include "i2caux_interface.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+
+
+/*
+ * @brief
+ * Trigger Source Select
+ * ASIC-dependent, actual values for register programming
+ */
+#define DCN10_DIG_FE_SOURCE_SELECT_INVALID 0x0
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGA 0x1
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGB 0x2
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGC 0x4
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGD 0x08
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGE 0x10
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGF 0x20
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGG 0x40
+
+enum {
+ DP_MST_UPDATE_MAX_RETRY = 50
+};
+
+
+
+static void aux_initialize(struct dcn10_link_encoder *enc10);
+
+
+static const struct link_encoder_funcs dcn10_lnk_enc_funcs = {
+ .validate_output_with_stream =
+ dcn10_link_encoder_validate_output_with_stream,
+ .hw_init = dcn10_link_encoder_hw_init,
+ .setup = dcn10_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn10_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn10_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn10_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy
+};
+
+static enum bp_result link_transmitter_control(
+ struct dcn10_link_encoder *enc10,
+ struct bp_transmitter_control *cntl)
+{
+ enum bp_result result;
+ struct dc_bios *bp = enc10->base.ctx->dc_bios;
+
+ result = bp->funcs->transmitter_control(bp, cntl);
+
+ return result;
+}
+
+static void enable_phy_bypass_mode(
+ struct dcn10_link_encoder *enc10,
+ bool enable)
+{
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_UPDATE(DP_DPHY_CNTL, DPHY_BYPASS, enable);
+
+}
+
+static void disable_prbs_symbols(
+ struct dcn10_link_encoder *enc10,
+ bool disable)
+{
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_UPDATE_4(DP_DPHY_CNTL,
+ DPHY_ATEST_SEL_LANE0, disable,
+ DPHY_ATEST_SEL_LANE1, disable,
+ DPHY_ATEST_SEL_LANE2, disable,
+ DPHY_ATEST_SEL_LANE3, disable);
+}
+
+static void disable_prbs_mode(
+ struct dcn10_link_encoder *enc10)
+{
+ REG_UPDATE(DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, 0);
+}
+
+static void program_pattern_symbols(
+ struct dcn10_link_encoder *enc10,
+ uint16_t pattern_symbols[8])
+{
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_SET_3(DP_DPHY_SYM0, 0,
+ DPHY_SYM1, pattern_symbols[0],
+ DPHY_SYM2, pattern_symbols[1],
+ DPHY_SYM3, pattern_symbols[2]);
+
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_SET_3(DP_DPHY_SYM1, 0,
+ DPHY_SYM4, pattern_symbols[3],
+ DPHY_SYM5, pattern_symbols[4],
+ DPHY_SYM6, pattern_symbols[5]);
+
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_SET_2(DP_DPHY_SYM2, 0,
+ DPHY_SYM7, pattern_symbols[6],
+ DPHY_SYM8, pattern_symbols[7]);
+}
+
+static void set_dp_phy_pattern_d102(
+ struct dcn10_link_encoder *enc10)
+{
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+
+ /* For 10-bit PRBS or debug symbols
+ * please use the following sequence:
+ *
+ * Enable debug symbols on the lanes
+ */
+ disable_prbs_symbols(enc10, true);
+
+ /* Disable PRBS mode */
+ disable_prbs_mode(enc10);
+
+ /* Program debug symbols to be output */
+ {
+ uint16_t pattern_symbols[8] = {
+ 0x2AA, 0x2AA, 0x2AA, 0x2AA,
+ 0x2AA, 0x2AA, 0x2AA, 0x2AA
+ };
+
+ program_pattern_symbols(enc10, pattern_symbols);
+ }
+
+ /* Enable phy bypass mode to enable the test pattern */
+
+ enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_link_training_complete(
+ struct dcn10_link_encoder *enc10,
+ bool complete)
+{
+ /* This register resides in DP back end block;
+ * transmitter is used for the offset
+ */
+ REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, complete);
+
+}
+
+void dcn10_link_encoder_set_dp_phy_pattern_training_pattern(
+ struct link_encoder *enc,
+ uint32_t index)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ /* Write Training Pattern */
+
+ REG_WRITE(DP_DPHY_TRAINING_PATTERN_SEL, index);
+
+ /* Set HW Register Training Complete to false */
+
+ set_link_training_complete(enc10, false);
+
+ /* Disable PHY Bypass mode to output Training Pattern */
+
+ enable_phy_bypass_mode(enc10, false);
+
+ /* Disable PRBS mode */
+ disable_prbs_mode(enc10);
+}
+
+static void setup_panel_mode(
+ struct dcn10_link_encoder *enc10,
+ enum dp_panel_mode panel_mode)
+{
+ uint32_t value;
+
+ ASSERT(REG(DP_DPHY_INTERNAL_CTRL));
+ value = REG_READ(DP_DPHY_INTERNAL_CTRL);
+
+ switch (panel_mode) {
+ case DP_PANEL_MODE_EDP:
+ value = 0x1;
+ break;
+ case DP_PANEL_MODE_SPECIAL:
+ value = 0x11;
+ break;
+ default:
+ value = 0x0;
+ break;
+ }
+
+ REG_WRITE(DP_DPHY_INTERNAL_CTRL, value);
+}
+
+static void set_dp_phy_pattern_symbol_error(
+ struct dcn10_link_encoder *enc10)
+{
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+
+ /* program correct panel mode*/
+ setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+
+ /* A PRBS23 pattern is used for most DP electrical measurements. */
+
+ /* Enable PRBS symbols on the lanes */
+ disable_prbs_symbols(enc10, false);
+
+ /* For PRBS23 Set bit DPHY_PRBS_SEL=1 and Set bit DPHY_PRBS_EN=1 */
+ REG_UPDATE_2(DP_DPHY_PRBS_CNTL,
+ DPHY_PRBS_SEL, 1,
+ DPHY_PRBS_EN, 1);
+
+ /* Enable phy bypass mode to enable the test pattern */
+ enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_prbs7(
+ struct dcn10_link_encoder *enc10)
+{
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+
+ /* A PRBS7 pattern is used for most DP electrical measurements. */
+
+ /* Enable PRBS symbols on the lanes */
+ disable_prbs_symbols(enc10, false);
+
+ /* For PRBS7 Set bit DPHY_PRBS_SEL=0 and Set bit DPHY_PRBS_EN=1 */
+ REG_UPDATE_2(DP_DPHY_PRBS_CNTL,
+ DPHY_PRBS_SEL, 0,
+ DPHY_PRBS_EN, 1);
+
+ /* Enable phy bypass mode to enable the test pattern */
+ enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_80bit_custom(
+ struct dcn10_link_encoder *enc10,
+ const uint8_t *pattern)
+{
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+
+ /* Enable debug symbols on the lanes */
+
+ disable_prbs_symbols(enc10, true);
+
+ /* Enable PHY bypass mode to enable the test pattern */
+ /* TODO is it really needed ? */
+
+ enable_phy_bypass_mode(enc10, true);
+
+ /* Program 80 bit custom pattern */
+ {
+ uint16_t pattern_symbols[8];
+
+ pattern_symbols[0] =
+ ((pattern[1] & 0x03) << 8) | pattern[0];
+ pattern_symbols[1] =
+ ((pattern[2] & 0x0f) << 6) | ((pattern[1] >> 2) & 0x3f);
+ pattern_symbols[2] =
+ ((pattern[3] & 0x3f) << 4) | ((pattern[2] >> 4) & 0x0f);
+ pattern_symbols[3] =
+ (pattern[4] << 2) | ((pattern[3] >> 6) & 0x03);
+ pattern_symbols[4] =
+ ((pattern[6] & 0x03) << 8) | pattern[5];
+ pattern_symbols[5] =
+ ((pattern[7] & 0x0f) << 6) | ((pattern[6] >> 2) & 0x3f);
+ pattern_symbols[6] =
+ ((pattern[8] & 0x3f) << 4) | ((pattern[7] >> 4) & 0x0f);
+ pattern_symbols[7] =
+ (pattern[9] << 2) | ((pattern[8] >> 6) & 0x03);
+
+ program_pattern_symbols(enc10, pattern_symbols);
+ }
+
+ /* Enable phy bypass mode to enable the test pattern */
+
+ enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_hbr2_compliance_cp2520_2(
+ struct dcn10_link_encoder *enc10,
+ unsigned int cp2520_pattern)
+{
+
+ /* previously there is a register DP_HBR2_EYE_PATTERN
+ * that is enabled to get the pattern.
+ * But it does not work with the latest spec change,
+ * so we are programming the following registers manually.
+ *
+ * The following settings have been confirmed
+ * by Nick Chorney and Sandra Liu
+ */
+
+ /* Disable PHY Bypass mode to setup the test pattern */
+
+ enable_phy_bypass_mode(enc10, false);
+
+ /* Setup DIG encoder in DP SST mode */
+ enc10->base.funcs->setup(&enc10->base, SIGNAL_TYPE_DISPLAY_PORT);
+
+ /* ensure normal panel mode. */
+ setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+
+ /* no vbid after BS (SR)
+ * DP_LINK_FRAMING_CNTL changed history Sandra Liu
+ * 11000260 / 11000104 / 110000FC
+ */
+ REG_UPDATE_3(DP_LINK_FRAMING_CNTL,
+ DP_IDLE_BS_INTERVAL, 0xFC,
+ DP_VBID_DISABLE, 1,
+ DP_VID_ENHANCED_FRAME_MODE, 1);
+
+ /* swap every BS with SR */
+ REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0);
+
+ /* select cp2520 patterns */
+ if (REG(DP_DPHY_HBR2_PATTERN_CONTROL))
+ REG_UPDATE(DP_DPHY_HBR2_PATTERN_CONTROL,
+ DP_DPHY_HBR2_PATTERN_CONTROL, cp2520_pattern);
+ else
+ /* pre-DCE11 can only generate CP2520 pattern 2 */
+ ASSERT(cp2520_pattern == 2);
+
+ /* set link training complete */
+ set_link_training_complete(enc10, true);
+
+ /* disable video stream */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0);
+
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+}
+
+static void set_dp_phy_pattern_passthrough_mode(
+ struct dcn10_link_encoder *enc10,
+ enum dp_panel_mode panel_mode)
+{
+ /* program correct panel mode */
+ setup_panel_mode(enc10, panel_mode);
+
+ /* restore LINK_FRAMING_CNTL and DPHY_SCRAMBLER_BS_COUNT
+ * in case we were doing HBR2 compliance pattern before
+ */
+ REG_UPDATE_3(DP_LINK_FRAMING_CNTL,
+ DP_IDLE_BS_INTERVAL, 0x2000,
+ DP_VBID_DISABLE, 0,
+ DP_VID_ENHANCED_FRAME_MODE, 1);
+
+ REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0x1FF);
+
+ /* set link training complete */
+ set_link_training_complete(enc10, true);
+
+ /* Disable PHY Bypass mode to setup the test pattern */
+ enable_phy_bypass_mode(enc10, false);
+
+ /* Disable PRBS mode */
+ disable_prbs_mode(enc10);
+}
+
+/* return value is bit-vector */
+static uint8_t get_frontend_source(
+ enum engine_id engine)
+{
+ switch (engine) {
+ case ENGINE_ID_DIGA:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGA;
+ case ENGINE_ID_DIGB:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGB;
+ case ENGINE_ID_DIGC:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGC;
+ case ENGINE_ID_DIGD:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGD;
+ case ENGINE_ID_DIGE:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGE;
+ case ENGINE_ID_DIGF:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGF;
+ case ENGINE_ID_DIGG:
+ return DCN10_DIG_FE_SOURCE_SELECT_DIGG;
+ default:
+ ASSERT_CRITICAL(false);
+ return DCN10_DIG_FE_SOURCE_SELECT_INVALID;
+ }
+}
+
+static void configure_encoder(
+ struct dcn10_link_encoder *enc10,
+ const struct dc_link_settings *link_settings)
+{
+ /* set number of lanes */
+
+ REG_SET(DP_CONFIG, 0,
+ DP_UDI_LANES, link_settings->lane_count - LANE_COUNT_ONE);
+
+ /* setup scrambler */
+ REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, 1);
+}
+
+void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc,
+ bool exit_link_training_required)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (exit_link_training_required)
+ REG_UPDATE(DP_DPHY_FAST_TRAINING,
+ DPHY_RX_FAST_TRAINING_CAPABLE, 1);
+ else {
+ REG_UPDATE(DP_DPHY_FAST_TRAINING,
+ DPHY_RX_FAST_TRAINING_CAPABLE, 0);
+ /*In DCE 11, we are able to pre-program a Force SR register
+ * to be able to trigger SR symbol after 5 idle patterns
+ * transmitted. Upon PSR Exit, DMCU can trigger
+ * DPHY_LOAD_BS_COUNT_START = 1. Upon writing 1 to
+ * DPHY_LOAD_BS_COUNT_START and the internal counter
+ * reaches DPHY_LOAD_BS_COUNT, the next BS symbol will be
+ * replaced by SR symbol once.
+ */
+
+ REG_UPDATE(DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, 0x5);
+ }
+}
+
+void dcn10_psr_program_secondary_packet(struct link_encoder *enc,
+ unsigned int sdp_transmit_line_num_deadline)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_UPDATE_2(DP_SEC_CNTL1,
+ DP_SEC_GSP0_LINE_NUM, sdp_transmit_line_num_deadline,
+ DP_SEC_GSP0_PRIORITY, 1);
+}
+
+bool dcn10_is_dig_enabled(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ uint32_t value;
+
+ REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value);
+ return value;
+}
+
+static void link_encoder_disable(struct dcn10_link_encoder *enc10)
+{
+ /* reset training pattern */
+ REG_SET(DP_DPHY_TRAINING_PATTERN_SEL, 0,
+ DPHY_TRAINING_PATTERN_SEL, 0);
+
+ /* reset training complete */
+ REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0);
+
+ /* reset panel mode */
+ setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+}
+
+static void hpd_initialize(
+ struct dcn10_link_encoder *enc10)
+{
+ /* Associate HPD with DIG_BE */
+ enum hpd_source_id hpd_source = enc10->base.hpd_source;
+
+ REG_UPDATE(DIG_BE_CNTL, DIG_HPD_SELECT, hpd_source);
+}
+
+bool dcn10_link_encoder_validate_dvi_output(
+ const struct dcn10_link_encoder *enc10,
+ enum signal_type connector_signal,
+ enum signal_type signal,
+ const struct dc_crtc_timing *crtc_timing)
+{
+ uint32_t max_pixel_clock = TMDS_MAX_PIXEL_CLOCK;
+
+ if (signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ max_pixel_clock *= 2;
+
+ /* This handles the case of HDMI downgrade to DVI we don't want to
+ * we don't want to cap the pixel clock if the DDI is not DVI.
+ */
+ if (connector_signal != SIGNAL_TYPE_DVI_DUAL_LINK &&
+ connector_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+ max_pixel_clock = enc10->base.features.max_hdmi_pixel_clock;
+
+ /* DVI only support RGB pixel encoding */
+ if (crtc_timing->pixel_encoding != PIXEL_ENCODING_RGB)
+ return false;
+
+ /*connect DVI via adpater's HDMI connector*/
+ if ((connector_signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ connector_signal == SIGNAL_TYPE_HDMI_TYPE_A) &&
+ signal != SIGNAL_TYPE_HDMI_TYPE_A &&
+ crtc_timing->pix_clk_khz > TMDS_MAX_PIXEL_CLOCK)
+ return false;
+ if (crtc_timing->pix_clk_khz < TMDS_MIN_PIXEL_CLOCK)
+ return false;
+
+ if (crtc_timing->pix_clk_khz > max_pixel_clock)
+ return false;
+
+ /* DVI supports 6/8bpp single-link and 10/16bpp dual-link */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_666:
+ case COLOR_DEPTH_888:
+ break;
+ case COLOR_DEPTH_101010:
+ case COLOR_DEPTH_161616:
+ if (signal != SIGNAL_TYPE_DVI_DUAL_LINK)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static bool dcn10_link_encoder_validate_hdmi_output(
+ const struct dcn10_link_encoder *enc10,
+ const struct dc_crtc_timing *crtc_timing,
+ int adjusted_pix_clk_khz)
+{
+ enum dc_color_depth max_deep_color =
+ enc10->base.features.max_hdmi_deep_color;
+
+ if (max_deep_color < crtc_timing->display_color_depth)
+ return false;
+
+ if (crtc_timing->display_color_depth < COLOR_DEPTH_888)
+ return false;
+ if (adjusted_pix_clk_khz < TMDS_MIN_PIXEL_CLOCK)
+ return false;
+
+ if ((adjusted_pix_clk_khz == 0) ||
+ (adjusted_pix_clk_khz > enc10->base.features.max_hdmi_pixel_clock))
+ return false;
+
+ /* DCE11 HW does not support 420 */
+ if (!enc10->base.features.ycbcr420_supported &&
+ crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+ return false;
+
+ if (!enc10->base.features.flags.bits.HDMI_6GB_EN &&
+ adjusted_pix_clk_khz >= 300000)
+ return false;
+ return true;
+}
+
+bool dcn10_link_encoder_validate_dp_output(
+ const struct dcn10_link_encoder *enc10,
+ const struct dc_crtc_timing *crtc_timing)
+{
+ /* default RGB only */
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB)
+ return true;
+
+ if (enc10->base.features.flags.bits.IS_YCBCR_CAPABLE)
+ return true;
+
+ /* for DCE 8.x or later DP Y-only feature,
+ * we need ASIC cap + FeatureSupportDPYonly, not support 666
+ */
+ if (crtc_timing->flags.Y_ONLY &&
+ enc10->base.features.flags.bits.IS_YCBCR_CAPABLE &&
+ crtc_timing->display_color_depth != COLOR_DEPTH_666)
+ return true;
+
+ return false;
+}
+
+void dcn10_link_encoder_construct(
+ struct dcn10_link_encoder *enc10,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_encoder_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+
+ enc10->base.funcs = &dcn10_lnk_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ enc10->base.features.flags.bits.
+ DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ /* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE.
+ * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY.
+ * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer
+ * DIG is per UNIPHY and used by SST DP, eDP, HDMI, DVI and LVDS.
+ * Prefer DIG assignment is decided by board design.
+ * For DCE 8.0, there are only max 6 UNIPHYs, we assume board design
+ * and VBIOS will filter out 7 UNIPHY for DCE 8.0.
+ * By this, adding DIGG should not hurt DCE 8.0.
+ * This will let DCE 8.1 share DCE 8.0 as much as possible
+ */
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ enc10->base.preferred_engine = ENGINE_ID_DIGF;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ enc10->base.preferred_engine = ENGINE_ID_DIGG;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ /* default to one to mirror Windows behavior */
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ result = bp_funcs->get_encoder_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.id, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+}
+
+bool dcn10_link_encoder_validate_output_with_stream(
+ struct link_encoder *enc,
+ const struct dc_stream_state *stream)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ bool is_valid;
+
+ switch (stream->signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ is_valid = dcn10_link_encoder_validate_dvi_output(
+ enc10,
+ stream->sink->link->connector_signal,
+ stream->signal,
+ &stream->timing);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ is_valid = dcn10_link_encoder_validate_hdmi_output(
+ enc10,
+ &stream->timing,
+ stream->phy_pix_clk);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ is_valid = dcn10_link_encoder_validate_dp_output(
+ enc10, &stream->timing);
+ break;
+ case SIGNAL_TYPE_EDP:
+ is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false;
+ break;
+ case SIGNAL_TYPE_VIRTUAL:
+ is_valid = true;
+ break;
+ default:
+ is_valid = false;
+ break;
+ }
+
+ return is_valid;
+}
+
+void dcn10_link_encoder_hw_init(
+ struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
+
+ cntl.action = TRANSMITTER_CONTROL_INIT;
+ cntl.engine_id = ENGINE_ID_UNKNOWN;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.connector_obj_id = enc10->base.connector;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+ cntl.coherent = false;
+ cntl.hpd_sel = enc10->base.hpd_source;
+
+ if (enc10->base.connector.id == CONNECTOR_ID_EDP)
+ cntl.signal = SIGNAL_TYPE_EDP;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+ __func__);
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ if (enc10->base.connector.id == CONNECTOR_ID_LVDS) {
+ cntl.action = TRANSMITTER_CONTROL_BACKLIGHT_BRIGHTNESS;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ ASSERT(result == BP_RESULT_OK);
+
+ }
+ aux_initialize(enc10);
+
+ /* reinitialize HPD.
+ * hpd_initialize() will pass DIG_FE id to HW context.
+ * All other routine within HW context will use fe_engine_offset
+ * as DIG_FE id even caller pass DIG_FE id.
+ * So this routine must be called first.
+ */
+ hpd_initialize(enc10);
+}
+
+void dcn10_link_encoder_destroy(struct link_encoder **enc)
+{
+ kfree(TO_DCN10_LINK_ENC(*enc));
+ *enc = NULL;
+}
+
+void dcn10_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (signal) {
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ /* DP SST */
+ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 0);
+ break;
+ case SIGNAL_TYPE_LVDS:
+ /* LVDS */
+ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 1);
+ break;
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 5);
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ /* invalid mode ! */
+ break;
+ }
+
+}
+
+/* TODO: still need depth or just pass in adjusted pixel clock? */
+void dcn10_link_encoder_enable_tmds_output(
+ struct link_encoder *enc,
+ enum clock_source_id clock_source,
+ enum dc_color_depth color_depth,
+ enum signal_type signal,
+ uint32_t pixel_clock)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
+
+ /* Enable the PHY */
+
+ cntl.action = TRANSMITTER_CONTROL_ENABLE;
+ cntl.engine_id = enc->preferred_engine;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.pll_id = clock_source;
+ cntl.signal = signal;
+ if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ cntl.lanes_number = 8;
+ else
+ cntl.lanes_number = 4;
+
+ cntl.hpd_sel = enc10->base.hpd_source;
+
+ cntl.pixel_clock = pixel_clock;
+ cntl.color_depth = color_depth;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+ __func__);
+ BREAK_TO_DEBUGGER();
+ }
+}
+
+/* enables DP PHY output */
+void dcn10_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
+
+ /* Enable the PHY */
+
+ /* number_of_lanes is used for pixel clock adjust,
+ * but it's not passed to asic_control.
+ * We need to set number of lanes manually.
+ */
+ configure_encoder(enc10, link_settings);
+
+ cntl.action = TRANSMITTER_CONTROL_ENABLE;
+ cntl.engine_id = enc->preferred_engine;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.pll_id = clock_source;
+ cntl.signal = SIGNAL_TYPE_DISPLAY_PORT;
+ cntl.lanes_number = link_settings->lane_count;
+ cntl.hpd_sel = enc10->base.hpd_source;
+ cntl.pixel_clock = link_settings->link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ;
+ /* TODO: check if undefined works */
+ cntl.color_depth = COLOR_DEPTH_UNDEFINED;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+ __func__);
+ BREAK_TO_DEBUGGER();
+ }
+}
+
+/* enables DP PHY output in MST mode */
+void dcn10_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
+
+ /* Enable the PHY */
+
+ /* number_of_lanes is used for pixel clock adjust,
+ * but it's not passed to asic_control.
+ * We need to set number of lanes manually.
+ */
+ configure_encoder(enc10, link_settings);
+
+ cntl.action = TRANSMITTER_CONTROL_ENABLE;
+ cntl.engine_id = ENGINE_ID_UNKNOWN;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.pll_id = clock_source;
+ cntl.signal = SIGNAL_TYPE_DISPLAY_PORT_MST;
+ cntl.lanes_number = link_settings->lane_count;
+ cntl.hpd_sel = enc10->base.hpd_source;
+ cntl.pixel_clock = link_settings->link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ;
+ /* TODO: check if undefined works */
+ cntl.color_depth = COLOR_DEPTH_UNDEFINED;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+ __func__);
+ BREAK_TO_DEBUGGER();
+ }
+}
+/*
+ * @brief
+ * Disable transmitter and its encoder
+ */
+void dcn10_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
+
+ if (!dcn10_is_dig_enabled(enc)) {
+ /* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
+ return;
+ }
+ /* Power-down RX and disable GPU PHY should be paired.
+ * Disabling PHY without powering down RX may cause
+ * symbol lock loss, on which we will get DP Sink interrupt.
+ */
+
+ /* There is a case for the DP active dongles
+ * where we want to disable the PHY but keep RX powered,
+ * for those we need to ignore DP Sink interrupt
+ * by checking lane count that has been set
+ * on the last do_enable_output().
+ */
+
+ /* disable transmitter */
+ cntl.action = TRANSMITTER_CONTROL_DISABLE;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.hpd_sel = enc10->base.hpd_source;
+ cntl.signal = signal;
+ cntl.connector_obj_id = enc10->base.connector;
+
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+ __func__);
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* disable encoder */
+ if (dc_is_dp_signal(signal))
+ link_encoder_disable(enc10);
+}
+
+void dcn10_link_encoder_dp_set_lane_settings(
+ struct link_encoder *enc,
+ const struct link_training_settings *link_settings)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ union dpcd_training_lane_set training_lane_set = { { 0 } };
+ int32_t lane = 0;
+ struct bp_transmitter_control cntl = { 0 };
+
+ if (!link_settings) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ cntl.action = TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS;
+ cntl.transmitter = enc10->base.transmitter;
+ cntl.connector_obj_id = enc10->base.connector;
+ cntl.lanes_number = link_settings->link_settings.lane_count;
+ cntl.hpd_sel = enc10->base.hpd_source;
+ cntl.pixel_clock = link_settings->link_settings.link_rate *
+ LINK_RATE_REF_FREQ_IN_KHZ;
+
+ for (lane = 0; lane < link_settings->link_settings.lane_count; lane++) {
+ /* translate lane settings */
+
+ training_lane_set.bits.VOLTAGE_SWING_SET =
+ link_settings->lane_settings[lane].VOLTAGE_SWING;
+ training_lane_set.bits.PRE_EMPHASIS_SET =
+ link_settings->lane_settings[lane].PRE_EMPHASIS;
+
+ /* post cursor 2 setting only applies to HBR2 link rate */
+ if (link_settings->link_settings.link_rate == LINK_RATE_HIGH2) {
+ /* this is passed to VBIOS
+ * to program post cursor 2 level
+ */
+ training_lane_set.bits.POST_CURSOR2_SET =
+ link_settings->lane_settings[lane].POST_CURSOR2;
+ }
+
+ cntl.lane_select = lane;
+ cntl.lane_settings = training_lane_set.raw;
+
+ /* call VBIOS table to set voltage swing and pre-emphasis */
+ link_transmitter_control(enc10, &cntl);
+ }
+}
+
+/* set DP PHY test and training patterns */
+void dcn10_link_encoder_dp_set_phy_pattern(
+ struct link_encoder *enc,
+ const struct encoder_set_dp_phy_pattern_param *param)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (param->dp_phy_pattern) {
+ case DP_TEST_PATTERN_TRAINING_PATTERN1:
+ dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 0);
+ break;
+ case DP_TEST_PATTERN_TRAINING_PATTERN2:
+ dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 1);
+ break;
+ case DP_TEST_PATTERN_TRAINING_PATTERN3:
+ dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 2);
+ break;
+ case DP_TEST_PATTERN_TRAINING_PATTERN4:
+ dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 3);
+ break;
+ case DP_TEST_PATTERN_D102:
+ set_dp_phy_pattern_d102(enc10);
+ break;
+ case DP_TEST_PATTERN_SYMBOL_ERROR:
+ set_dp_phy_pattern_symbol_error(enc10);
+ break;
+ case DP_TEST_PATTERN_PRBS7:
+ set_dp_phy_pattern_prbs7(enc10);
+ break;
+ case DP_TEST_PATTERN_80BIT_CUSTOM:
+ set_dp_phy_pattern_80bit_custom(
+ enc10, param->custom_pattern);
+ break;
+ case DP_TEST_PATTERN_CP2520_1:
+ set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 1);
+ break;
+ case DP_TEST_PATTERN_CP2520_2:
+ set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 2);
+ break;
+ case DP_TEST_PATTERN_CP2520_3:
+ set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 3);
+ break;
+ case DP_TEST_PATTERN_VIDEO_MODE: {
+ set_dp_phy_pattern_passthrough_mode(
+ enc10, param->dp_panel_mode);
+ break;
+ }
+
+ default:
+ /* invalid phy pattern */
+ ASSERT_CRITICAL(false);
+ break;
+ }
+}
+
+static void fill_stream_allocation_row_info(
+ const struct link_mst_stream_allocation *stream_allocation,
+ uint32_t *src,
+ uint32_t *slots)
+{
+ const struct stream_encoder *stream_enc = stream_allocation->stream_enc;
+
+ if (stream_enc) {
+ *src = stream_enc->id;
+ *slots = stream_allocation->slot_count;
+ } else {
+ *src = 0;
+ *slots = 0;
+ }
+}
+
+/* programs DP MST VC payload allocation */
+void dcn10_link_encoder_update_mst_stream_allocation_table(
+ struct link_encoder *enc,
+ const struct link_mst_stream_allocation_table *table)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ uint32_t value0 = 0;
+ uint32_t value1 = 0;
+ uint32_t value2 = 0;
+ uint32_t slots = 0;
+ uint32_t src = 0;
+ uint32_t retries = 0;
+
+ /* For CZ, there are only 3 pipes. So Virtual channel is up 3.*/
+
+ /* --- Set MSE Stream Attribute -
+ * Setup VC Payload Table on Tx Side,
+ * Issue allocation change trigger
+ * to commit payload on both tx and rx side
+ */
+
+ /* we should clean-up table each time */
+
+ if (table->stream_count >= 1) {
+ fill_stream_allocation_row_info(
+ &table->stream_allocations[0],
+ &src,
+ &slots);
+ } else {
+ src = 0;
+ slots = 0;
+ }
+
+ REG_UPDATE_2(DP_MSE_SAT0,
+ DP_MSE_SAT_SRC0, src,
+ DP_MSE_SAT_SLOT_COUNT0, slots);
+
+ if (table->stream_count >= 2) {
+ fill_stream_allocation_row_info(
+ &table->stream_allocations[1],
+ &src,
+ &slots);
+ } else {
+ src = 0;
+ slots = 0;
+ }
+
+ REG_UPDATE_2(DP_MSE_SAT0,
+ DP_MSE_SAT_SRC1, src,
+ DP_MSE_SAT_SLOT_COUNT1, slots);
+
+ if (table->stream_count >= 3) {
+ fill_stream_allocation_row_info(
+ &table->stream_allocations[2],
+ &src,
+ &slots);
+ } else {
+ src = 0;
+ slots = 0;
+ }
+
+ REG_UPDATE_2(DP_MSE_SAT1,
+ DP_MSE_SAT_SRC2, src,
+ DP_MSE_SAT_SLOT_COUNT2, slots);
+
+ if (table->stream_count >= 4) {
+ fill_stream_allocation_row_info(
+ &table->stream_allocations[3],
+ &src,
+ &slots);
+ } else {
+ src = 0;
+ slots = 0;
+ }
+
+ REG_UPDATE_2(DP_MSE_SAT1,
+ DP_MSE_SAT_SRC3, src,
+ DP_MSE_SAT_SLOT_COUNT3, slots);
+
+ /* --- wait for transaction finish */
+
+ /* send allocation change trigger (ACT) ?
+ * this step first sends the ACT,
+ * then double buffers the SAT into the hardware
+ * making the new allocation active on the DP MST mode link
+ */
+
+ /* DP_MSE_SAT_UPDATE:
+ * 0 - No Action
+ * 1 - Update SAT with trigger
+ * 2 - Update SAT without trigger
+ */
+ REG_UPDATE(DP_MSE_SAT_UPDATE,
+ DP_MSE_SAT_UPDATE, 1);
+
+ /* wait for update to complete
+ * (i.e. DP_MSE_SAT_UPDATE field is reset to 0)
+ * then wait for the transmission
+ * of at least 16 MTP headers on immediate local link.
+ * i.e. DP_MSE_16_MTP_KEEPOUT field (read only) is reset to 0
+ * a value of 1 indicates that DP MST mode
+ * is in the 16 MTP keepout region after a VC has been added.
+ * MST stream bandwidth (VC rate) can be configured
+ * after this bit is cleared
+ */
+ do {
+ udelay(10);
+
+ value0 = REG_READ(DP_MSE_SAT_UPDATE);
+
+ REG_GET(DP_MSE_SAT_UPDATE,
+ DP_MSE_SAT_UPDATE, &value1);
+
+ REG_GET(DP_MSE_SAT_UPDATE,
+ DP_MSE_16_MTP_KEEPOUT, &value2);
+
+ /* bit field DP_MSE_SAT_UPDATE is set to 1 already */
+ if (!value1 && !value2)
+ break;
+ ++retries;
+ } while (retries < DP_MST_UPDATE_MAX_RETRY);
+}
+
+void dcn10_link_encoder_connect_dig_be_to_fe(
+ struct link_encoder *enc,
+ enum engine_id engine,
+ bool connect)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ uint32_t field;
+
+ if (engine != ENGINE_ID_UNKNOWN) {
+
+ REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &field);
+
+ if (connect)
+ field |= get_frontend_source(engine);
+ else
+ field &= ~get_frontend_source(engine);
+
+ REG_UPDATE(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, field);
+ }
+}
+
+
+#define HPD_REG(reg)\
+ (enc10->hpd_regs->reg)
+
+#define HPD_REG_READ(reg_name) \
+ dm_read_reg(CTX, HPD_REG(reg_name))
+
+#define HPD_REG_UPDATE_N(reg_name, n, ...) \
+ generic_reg_update_ex(CTX, \
+ HPD_REG(reg_name), \
+ HPD_REG_READ(reg_name), \
+ n, __VA_ARGS__)
+
+#define HPD_REG_UPDATE(reg_name, field, val) \
+ HPD_REG_UPDATE_N(reg_name, 1, \
+ FN(reg_name, field), val)
+
+void dcn10_link_encoder_enable_hpd(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ HPD_REG_UPDATE(DC_HPD_CONTROL,
+ DC_HPD_EN, 1);
+}
+
+void dcn10_link_encoder_disable_hpd(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ HPD_REG_UPDATE(DC_HPD_CONTROL,
+ DC_HPD_EN, 0);
+}
+
+
+#define AUX_REG(reg)\
+ (enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+ dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_UPDATE_N(reg_name, n, ...) \
+ generic_reg_update_ex(CTX, \
+ AUX_REG(reg_name), \
+ AUX_REG_READ(reg_name), \
+ n, __VA_ARGS__)
+
+#define AUX_REG_UPDATE(reg_name, field, val) \
+ AUX_REG_UPDATE_N(reg_name, 1, \
+ FN(reg_name, field), val)
+
+#define AUX_REG_UPDATE_2(reg, f1, v1, f2, v2) \
+ AUX_REG_UPDATE_N(reg, 2,\
+ FN(reg, f1), v1,\
+ FN(reg, f2), v2)
+
+static void aux_initialize(
+ struct dcn10_link_encoder *enc10)
+{
+ enum hpd_source_id hpd_source = enc10->base.hpd_source;
+
+ AUX_REG_UPDATE_2(AUX_CONTROL,
+ AUX_HPD_SEL, hpd_source,
+ AUX_LS_READ_EN, 0);
+
+ /* 1/4 window (the maximum allowed) */
+ AUX_REG_UPDATE(AUX_DPHY_RX_CONTROL0,
+ AUX_RX_RECEIVE_WINDOW, 1);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
new file mode 100644
index 000000000000..2a97cdb2cfbb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN10_H__
+#define __DC_LINK_ENCODER__DCN10_H__
+
+#include "link_encoder.h"
+
+#define TO_DCN10_LINK_ENC(link_encoder)\
+ container_of(link_encoder, struct dcn10_link_encoder, base)
+
+
+#define AUX_REG_LIST(id)\
+ SRI(AUX_CONTROL, DP_AUX, id), \
+ SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id)
+
+#define HPD_REG_LIST(id)\
+ SRI(DC_HPD_CONTROL, HPD, id)
+
+#define LE_DCN_COMMON_REG_LIST(id) \
+ SRI(DIG_BE_CNTL, DIG, id), \
+ SRI(DIG_BE_EN_CNTL, DIG, id), \
+ SRI(DP_CONFIG, DP, id), \
+ SRI(DP_DPHY_CNTL, DP, id), \
+ SRI(DP_DPHY_PRBS_CNTL, DP, id), \
+ SRI(DP_DPHY_SCRAM_CNTL, DP, id),\
+ SRI(DP_DPHY_SYM0, DP, id), \
+ SRI(DP_DPHY_SYM1, DP, id), \
+ SRI(DP_DPHY_SYM2, DP, id), \
+ SRI(DP_DPHY_TRAINING_PATTERN_SEL, DP, id), \
+ SRI(DP_LINK_CNTL, DP, id), \
+ SRI(DP_LINK_FRAMING_CNTL, DP, id), \
+ SRI(DP_MSE_SAT0, DP, id), \
+ SRI(DP_MSE_SAT1, DP, id), \
+ SRI(DP_MSE_SAT2, DP, id), \
+ SRI(DP_MSE_SAT_UPDATE, DP, id), \
+ SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_VID_STREAM_CNTL, DP, id), \
+ SRI(DP_DPHY_FAST_TRAINING, DP, id), \
+ SRI(DP_SEC_CNTL1, DP, id), \
+ SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
+ SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
+ SRI(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id)
+
+#define LE_DCN10_REG_LIST(id)\
+ LE_DCN_COMMON_REG_LIST(id)
+
+struct dcn10_link_enc_aux_registers {
+ uint32_t AUX_CONTROL;
+ uint32_t AUX_DPHY_RX_CONTROL0;
+};
+
+struct dcn10_link_enc_hpd_registers {
+ uint32_t DC_HPD_CONTROL;
+};
+
+struct dcn10_link_enc_registers {
+ uint32_t DIG_BE_CNTL;
+ uint32_t DIG_BE_EN_CNTL;
+ uint32_t DP_CONFIG;
+ uint32_t DP_DPHY_CNTL;
+ uint32_t DP_DPHY_INTERNAL_CTRL;
+ uint32_t DP_DPHY_PRBS_CNTL;
+ uint32_t DP_DPHY_SCRAM_CNTL;
+ uint32_t DP_DPHY_SYM0;
+ uint32_t DP_DPHY_SYM1;
+ uint32_t DP_DPHY_SYM2;
+ uint32_t DP_DPHY_TRAINING_PATTERN_SEL;
+ uint32_t DP_LINK_CNTL;
+ uint32_t DP_LINK_FRAMING_CNTL;
+ uint32_t DP_MSE_SAT0;
+ uint32_t DP_MSE_SAT1;
+ uint32_t DP_MSE_SAT2;
+ uint32_t DP_MSE_SAT_UPDATE;
+ uint32_t DP_SEC_CNTL;
+ uint32_t DP_VID_STREAM_CNTL;
+ uint32_t DP_DPHY_FAST_TRAINING;
+ uint32_t DP_DPHY_BS_SR_SWAP_CNTL;
+ uint32_t DP_DPHY_HBR2_PATTERN_CONTROL;
+ uint32_t DP_SEC_CNTL1;
+};
+
+#define LE_SF(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN10(mask_sh)\
+ LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_ENABLE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_MODE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+ LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+ LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+ LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh)
+
+#define DCN_LINK_ENCODER_REG_FIELD_LIST(type) \
+ type DIG_ENABLE;\
+ type DIG_HPD_SELECT;\
+ type DIG_MODE;\
+ type DIG_FE_SOURCE_SELECT;\
+ type DPHY_BYPASS;\
+ type DPHY_ATEST_SEL_LANE0;\
+ type DPHY_ATEST_SEL_LANE1;\
+ type DPHY_ATEST_SEL_LANE2;\
+ type DPHY_ATEST_SEL_LANE3;\
+ type DPHY_PRBS_EN;\
+ type DPHY_PRBS_SEL;\
+ type DPHY_SYM1;\
+ type DPHY_SYM2;\
+ type DPHY_SYM3;\
+ type DPHY_SYM4;\
+ type DPHY_SYM5;\
+ type DPHY_SYM6;\
+ type DPHY_SYM7;\
+ type DPHY_SYM8;\
+ type DPHY_SCRAMBLER_BS_COUNT;\
+ type DPHY_SCRAMBLER_ADVANCE;\
+ type DPHY_RX_FAST_TRAINING_CAPABLE;\
+ type DPHY_LOAD_BS_COUNT;\
+ type DPHY_TRAINING_PATTERN_SEL;\
+ type DP_DPHY_HBR2_PATTERN_CONTROL;\
+ type DP_LINK_TRAINING_COMPLETE;\
+ type DP_IDLE_BS_INTERVAL;\
+ type DP_VBID_DISABLE;\
+ type DP_VID_ENHANCED_FRAME_MODE;\
+ type DP_VID_STREAM_ENABLE;\
+ type DP_UDI_LANES;\
+ type DP_SEC_GSP0_LINE_NUM;\
+ type DP_SEC_GSP0_PRIORITY;\
+ type DP_MSE_SAT_SRC0;\
+ type DP_MSE_SAT_SRC1;\
+ type DP_MSE_SAT_SRC2;\
+ type DP_MSE_SAT_SRC3;\
+ type DP_MSE_SAT_SLOT_COUNT0;\
+ type DP_MSE_SAT_SLOT_COUNT1;\
+ type DP_MSE_SAT_SLOT_COUNT2;\
+ type DP_MSE_SAT_SLOT_COUNT3;\
+ type DP_MSE_SAT_UPDATE;\
+ type DP_MSE_16_MTP_KEEPOUT;\
+ type AUX_HPD_SEL;\
+ type AUX_LS_READ_EN;\
+ type AUX_RX_RECEIVE_WINDOW;\
+ type DC_HPD_EN
+
+struct dcn10_link_enc_shift {
+ DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
+};
+
+struct dcn10_link_enc_mask {
+ DCN_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
+};
+
+struct dcn10_link_encoder {
+ struct link_encoder base;
+ const struct dcn10_link_enc_registers *link_regs;
+ const struct dcn10_link_enc_aux_registers *aux_regs;
+ const struct dcn10_link_enc_hpd_registers *hpd_regs;
+ const struct dcn10_link_enc_shift *link_shift;
+ const struct dcn10_link_enc_mask *link_mask;
+};
+
+
+void dcn10_link_encoder_construct(
+ struct dcn10_link_encoder *enc10,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+bool dcn10_link_encoder_validate_dvi_output(
+ const struct dcn10_link_encoder *enc10,
+ enum signal_type connector_signal,
+ enum signal_type signal,
+ const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_rgb_output(
+ const struct dcn10_link_encoder *enc10,
+ const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_dp_output(
+ const struct dcn10_link_encoder *enc10,
+ const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_wireless_output(
+ const struct dcn10_link_encoder *enc10,
+ const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_output_with_stream(
+ struct link_encoder *enc,
+ const struct dc_stream_state *stream);
+
+/****************** HW programming ************************/
+
+/* initialize HW */ /* why do we initialze aux in here? */
+void dcn10_link_encoder_hw_init(struct link_encoder *enc);
+
+void dcn10_link_encoder_destroy(struct link_encoder **enc);
+
+/* program DIG_MODE in DIG_BE */
+/* TODO can this be combined with enable_output? */
+void dcn10_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+/* enables TMDS PHY output */
+/* TODO: still need depth or just pass in adjusted pixel clock? */
+void dcn10_link_encoder_enable_tmds_output(
+ struct link_encoder *enc,
+ enum clock_source_id clock_source,
+ enum dc_color_depth color_depth,
+ enum signal_type signal,
+ uint32_t pixel_clock);
+
+/* enables DP PHY output */
+void dcn10_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/* enables DP PHY output in MST mode */
+void dcn10_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/* disable PHY output */
+void dcn10_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+/* set DP lane settings */
+void dcn10_link_encoder_dp_set_lane_settings(
+ struct link_encoder *enc,
+ const struct link_training_settings *link_settings);
+
+void dcn10_link_encoder_dp_set_phy_pattern(
+ struct link_encoder *enc,
+ const struct encoder_set_dp_phy_pattern_param *param);
+
+/* programs DP MST VC payload allocation */
+void dcn10_link_encoder_update_mst_stream_allocation_table(
+ struct link_encoder *enc,
+ const struct link_mst_stream_allocation_table *table);
+
+void dcn10_link_encoder_connect_dig_be_to_fe(
+ struct link_encoder *enc,
+ enum engine_id engine,
+ bool connect);
+
+void dcn10_link_encoder_set_dp_phy_pattern_training_pattern(
+ struct link_encoder *enc,
+ uint32_t index);
+
+void dcn10_link_encoder_enable_hpd(struct link_encoder *enc);
+
+void dcn10_link_encoder_disable_hpd(struct link_encoder *enc);
+
+void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc,
+ bool exit_link_training_required);
+
+void dcn10_psr_program_secondary_packet(struct link_encoder *enc,
+ unsigned int sdp_transmit_line_num_deadline);
+
+bool dcn10_is_dig_enabled(struct link_encoder *enc);
+
+#endif /* __DC_LINK_ENCODER__DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 179890b1a8c4..9ca51ae46de7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -65,6 +65,7 @@ static void mpc1_update_blending(
int mpcc_id)
{
struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+ struct mpcc *mpcc = mpc1_get_mpcc(mpc, mpcc_id);
REG_UPDATE_5(MPCC_CONTROL[mpcc_id],
MPCC_ALPHA_BLND_MODE, blnd_cfg->alpha_mode,
@@ -74,6 +75,7 @@ static void mpc1_update_blending(
MPCC_GLOBAL_GAIN, blnd_cfg->global_gain);
mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id);
+ mpcc->blnd_cfg = *blnd_cfg;
}
void mpc1_update_stereo_mix(
@@ -235,8 +237,7 @@ struct mpcc *mpc1_insert_plane(
}
/* update the blending configuration */
- new_mpcc->blnd_cfg = *blnd_cfg;
- mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id);
+ mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id);
/* update the stereo mix settings, if provided */
if (sm_cfg != NULL) {
@@ -409,7 +410,26 @@ void mpc1_init_mpcc_list_from_hw(
}
}
+void mpc1_read_mpcc_state(
+ struct mpc *mpc,
+ int mpcc_inst,
+ struct mpcc_state *s)
+{
+ struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
+ REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id);
+ REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id);
+ REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id);
+ REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode,
+ MPCC_ALPHA_BLND_MODE, &s->alpha_mode,
+ MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha,
+ MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only);
+ REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle,
+ MPCC_BUSY, &s->busy);
+}
+
const struct mpc_funcs dcn10_mpc_funcs = {
+ .read_mpcc_state = mpc1_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc1_mpc_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
index 267a2995ef6e..d3d16c4cbea3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
@@ -183,4 +183,9 @@ struct mpcc *mpc1_get_mpcc_for_dpp(
struct mpc_tree *tree,
int dpp_id);
+void mpc1_read_mpcc_state(
+ struct mpc *mpc,
+ int mpcc_inst,
+ struct mpcc_state *s);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 4bf64d1b2c60..f2fbce0e3fc5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -93,6 +93,81 @@ static void optc1_disable_stereo(struct timing_generator *optc)
OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0);
}
+static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing)
+{
+ struct dc_crtc_timing patched_crtc_timing;
+ int vesa_sync_start;
+ int asic_blank_end;
+ int interlace_factor;
+ int vertical_line_start;
+
+ patched_crtc_timing = *dc_crtc_timing;
+ optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
+
+ vesa_sync_start = patched_crtc_timing.h_addressable +
+ patched_crtc_timing.h_border_right +
+ patched_crtc_timing.h_front_porch;
+
+ asic_blank_end = patched_crtc_timing.h_total -
+ vesa_sync_start -
+ patched_crtc_timing.h_border_left;
+
+ interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1;
+
+ vesa_sync_start = patched_crtc_timing.v_addressable +
+ patched_crtc_timing.v_border_bottom +
+ patched_crtc_timing.v_front_porch;
+
+ asic_blank_end = (patched_crtc_timing.v_total -
+ vesa_sync_start -
+ patched_crtc_timing.v_border_top)
+ * interlace_factor;
+
+ vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
+ if (vertical_line_start < 0) {
+ ASSERT(0);
+ vertical_line_start = 0;
+ }
+
+ return vertical_line_start;
+}
+
+void optc1_program_vline_interrupt(
+ struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing,
+ unsigned long long vsync_delta)
+{
+
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000);
+ unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_khz + 99), 100);
+ uint32_t req_delta_lines = (uint32_t) div64_u64(
+ (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1),
+ dc_crtc_timing->h_total);
+
+ uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing);
+ uint32_t start_line = 0;
+ uint32_t endLine = 0;
+
+ if (req_delta_lines != 0)
+ req_delta_lines--;
+
+ if (req_delta_lines > vsync_line)
+ start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1;
+ else
+ start_line = vsync_line - req_delta_lines;
+
+ endLine = start_line + 2;
+
+ if (endLine >= dc_crtc_timing->v_total)
+ endLine = 2;
+
+ REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0,
+ OTG_VERTICAL_INTERRUPT0_LINE_START, start_line,
+ OTG_VERTICAL_INTERRUPT0_LINE_END, endLine);
+}
+
/**
* program_timing_generator used by mode timing set
* Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition.
@@ -285,7 +360,7 @@ void optc1_program_timing(
}
-static void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -780,17 +855,17 @@ void optc1_set_drr(
OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
OTG_SET_V_TOTAL_MIN_MASK, 0);
} else {
- REG_SET(OTG_V_TOTAL_MIN, 0,
- OTG_V_TOTAL_MIN, 0);
-
- REG_SET(OTG_V_TOTAL_MAX, 0,
- OTG_V_TOTAL_MAX, 0);
-
REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
OTG_SET_V_TOTAL_MIN_MASK, 0,
OTG_V_TOTAL_MIN_SEL, 0,
OTG_V_TOTAL_MAX_SEL, 0,
OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ REG_SET(OTG_V_TOTAL_MIN, 0,
+ OTG_V_TOTAL_MIN, 0);
+
+ REG_SET(OTG_V_TOTAL_MAX, 0,
+ OTG_V_TOTAL_MAX, 0);
}
}
@@ -1154,6 +1229,12 @@ void optc1_read_otg_state(struct optc *optc1,
REG_GET(OTG_V_TOTAL_MIN,
OTG_V_TOTAL_MIN, &s->v_total_min);
+ REG_GET(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel);
+
+ REG_GET(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel);
+
REG_GET_2(OTG_V_SYNC_A,
OTG_V_SYNC_A_START, &s->v_sync_a_start,
OTG_V_SYNC_A_END, &s->v_sync_a_end);
@@ -1176,20 +1257,20 @@ void optc1_read_otg_state(struct optc *optc1,
OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status);
}
-static void optc1_clear_optc_underflow(struct timing_generator *optc)
+void optc1_clear_optc_underflow(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1);
}
-static void optc1_tg_init(struct timing_generator *optc)
+void optc1_tg_init(struct timing_generator *optc)
{
optc1_set_blank_data_double_buffer(optc, true);
optc1_clear_optc_underflow(optc);
}
-static bool optc1_is_tg_enabled(struct timing_generator *optc)
+bool optc1_is_tg_enabled(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
uint32_t otg_enabled = 0;
@@ -1200,7 +1281,7 @@ static bool optc1_is_tg_enabled(struct timing_generator *optc)
}
-static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
uint32_t underflow_occurred = 0;
@@ -1215,6 +1296,7 @@ static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
static const struct timing_generator_funcs dcn10_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
+ .program_vline_interrupt = optc1_program_vline_interrupt,
.program_global_sync = optc1_program_global_sync,
.enable_crtc = optc1_enable_crtc,
.disable_crtc = optc1_disable_crtc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index d25e7bf0d0d7..c62052f46460 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -65,6 +65,8 @@
SRI(OTG_NOM_VERT_POSITION, OTG, inst),\
SRI(OTG_BLACK_COLOR, OTG, inst),\
SRI(OTG_CLOCK_CONTROL, OTG, inst),\
+ SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\
+ SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\
SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\
SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\
SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\
@@ -124,6 +126,8 @@ struct dcn_optc_registers {
uint32_t OTG_TEST_PATTERN_CONTROL;
uint32_t OTG_TEST_PATTERN_COLOR;
uint32_t OTG_CLOCK_CONTROL;
+ uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL;
+ uint32_t OTG_VERTICAL_INTERRUPT0_POSITION;
uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL;
uint32_t OTG_VERTICAL_INTERRUPT2_POSITION;
uint32_t OPTC_INPUT_CLOCK_CONTROL;
@@ -206,6 +210,9 @@ struct dcn_optc_registers {
SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\
SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\
SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\
SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\
SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\
SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\
@@ -323,6 +330,9 @@ struct dcn_optc_registers {
type OTG_CLOCK_EN;\
type OTG_CLOCK_ON;\
type OTG_CLOCK_GATE_DIS;\
+ type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\
+ type OTG_VERTICAL_INTERRUPT0_LINE_START;\
+ type OTG_VERTICAL_INTERRUPT0_LINE_END;\
type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\
type OTG_VERTICAL_INTERRUPT2_LINE_START;\
type OPTC_INPUT_CLK_EN;\
@@ -396,6 +406,8 @@ struct dcn_otg_state {
uint32_t v_total;
uint32_t v_total_max;
uint32_t v_total_min;
+ uint32_t v_total_min_sel;
+ uint32_t v_total_max_sel;
uint32_t v_sync_a_start;
uint32_t v_sync_a_end;
uint32_t h_blank_start;
@@ -420,6 +432,10 @@ void optc1_program_timing(
const struct dc_crtc_timing *dc_crtc_timing,
bool use_vbios);
+void optc1_program_vline_interrupt(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing,
+ unsigned long long vsync_delta);
+
void optc1_program_global_sync(
struct timing_generator *optc);
@@ -481,4 +497,14 @@ void optc1_program_stereo(struct timing_generator *optc,
bool optc1_is_stereo_left_eye(struct timing_generator *optc);
+void optc1_clear_optc_underflow(struct timing_generator *optc);
+
+void optc1_tg_init(struct timing_generator *optc);
+
+bool optc1_is_tg_enabled(struct timing_generator *optc);
+
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
+
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
+
#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 02bd664aed3e..df5cb2d1d164 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -38,8 +38,8 @@
#include "dcn10/dcn10_hw_sequencer.h"
#include "dce110/dce110_hw_sequencer.h"
#include "dcn10/dcn10_opp.h"
-#include "dce/dce_link_encoder.h"
-#include "dce/dce_stream_encoder.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
#include "dce/dce_clocks.h"
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
@@ -166,36 +166,22 @@ static const struct dce_abm_mask abm_mask = {
#define stream_enc_regs(id)\
[id] = {\
- SE_DCN_REG_LIST(id),\
- .TMDS_CNTL = 0,\
- .AFMT_AVI_INFO0 = 0,\
- .AFMT_AVI_INFO1 = 0,\
- .AFMT_AVI_INFO2 = 0,\
- .AFMT_AVI_INFO3 = 0,\
+ SE_DCN_REG_LIST(id)\
}
-static const struct dce110_stream_enc_registers stream_enc_regs[] = {
+static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
stream_enc_regs(0),
stream_enc_regs(1),
stream_enc_regs(2),
stream_enc_regs(3),
};
-static const struct dce_stream_encoder_shift se_shift = {
+static const struct dcn10_stream_encoder_shift se_shift = {
SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT)
};
-static const struct dce_stream_encoder_mask se_mask = {
- SE_COMMON_MASK_SH_LIST_DCN10(_MASK),
- .AFMT_GENERIC0_UPDATE = 0,
- .AFMT_GENERIC2_UPDATE = 0,
- .DP_DYN_RANGE = 0,
- .DP_YCBCR_RANGE = 0,
- .HDMI_AVI_INFO_SEND = 0,
- .HDMI_AVI_INFO_CONT = 0,
- .HDMI_AVI_INFO_LINE = 0,
- .DP_SEC_AVI_ENABLE = 0,
- .AFMT_AVI_INFO_VERSION = 0
+static const struct dcn10_stream_encoder_mask se_mask = {
+ SE_COMMON_MASK_SH_LIST_DCN10(_MASK)
};
#define audio_regs(id)\
@@ -228,13 +214,11 @@ static const struct dce_aduio_mask audio_mask = {
AUX_REG_LIST(id)\
}
-static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = {
+static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
aux_regs(0),
aux_regs(1),
aux_regs(2),
- aux_regs(3),
- aux_regs(4),
- aux_regs(5)
+ aux_regs(3)
};
#define hpd_regs(id)\
@@ -242,13 +226,11 @@ static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = {
HPD_REG_LIST(id)\
}
-static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = {
+static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
hpd_regs(0),
hpd_regs(1),
hpd_regs(2),
- hpd_regs(3),
- hpd_regs(4),
- hpd_regs(5)
+ hpd_regs(3)
};
#define link_regs(id)\
@@ -257,14 +239,19 @@ static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = {
SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
}
-static const struct dce110_link_enc_registers link_enc_regs[] = {
+static const struct dcn10_link_enc_registers link_enc_regs[] = {
link_regs(0),
link_regs(1),
link_regs(2),
- link_regs(3),
- link_regs(4),
- link_regs(5),
- link_regs(6),
+ link_regs(3)
+};
+
+static const struct dcn10_link_enc_shift le_shift = {
+ LINK_ENCODER_MASK_SH_LIST_DCN10(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+ LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK)
};
#define ipp_regs(id)\
@@ -320,11 +307,14 @@ static const struct dcn_dpp_registers tf_regs[] = {
};
static const struct dcn_dpp_shift tf_shift = {
- TF_REG_LIST_SH_MASK_DCN10(__SHIFT)
+ TF_REG_LIST_SH_MASK_DCN10(__SHIFT),
+ TF_DEBUG_REG_LIST_SH_DCN10
+
};
static const struct dcn_dpp_mask tf_mask = {
TF_REG_LIST_SH_MASK_DCN10(_MASK),
+ TF_DEBUG_REG_LIST_MASK_DCN10
};
static const struct dcn_mpc_registers mpc_regs = {
@@ -457,6 +447,8 @@ static const struct dc_debug debug_defaults_drv = {
.vsr_support = true,
.performance_trace = false,
.az_endpoint_mute_only = true,
+ .recovery_enabled = false, /*enable this by default after testing.*/
+ .max_downscale_src_width = 3840,
};
static const struct dc_debug debug_defaults_diags = {
@@ -592,20 +584,22 @@ static const struct encoder_feature_support link_enc_feature = {
struct link_encoder *dcn10_link_encoder_create(
const struct encoder_init_data *enc_init_data)
{
- struct dce110_link_encoder *enc110 =
- kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+ struct dcn10_link_encoder *enc10 =
+ kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL);
- if (!enc110)
+ if (!enc10)
return NULL;
- dce110_link_encoder_construct(enc110,
+ dcn10_link_encoder_construct(enc10,
enc_init_data,
&link_enc_feature,
&link_enc_regs[enc_init_data->transmitter],
&link_enc_aux_regs[enc_init_data->channel - 1],
- &link_enc_hpd_regs[enc_init_data->hpd_source]);
+ &link_enc_hpd_regs[enc_init_data->hpd_source],
+ &le_shift,
+ &le_mask);
- return &enc110->base;
+ return &enc10->base;
}
struct clock_source *dcn10_clock_source_create(
@@ -650,16 +644,16 @@ static struct stream_encoder *dcn10_stream_encoder_create(
enum engine_id eng_id,
struct dc_context *ctx)
{
- struct dce110_stream_encoder *enc110 =
- kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL);
+ struct dcn10_stream_encoder *enc1 =
+ kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
- if (!enc110)
+ if (!enc1)
return NULL;
- dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id,
+ dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
&stream_enc_regs[eng_id],
&se_shift, &se_mask);
- return &enc110->base;
+ return &enc1->base;
}
static const struct dce_hwseq_registers hwseq_reg = {
@@ -918,36 +912,6 @@ enum dc_status dcn10_add_stream_to_ctx(
return result;
}
-enum dc_status dcn10_validate_guaranteed(
- struct dc *dc,
- struct dc_stream_state *dc_stream,
- struct dc_state *context)
-{
- enum dc_status result = DC_ERROR_UNEXPECTED;
-
- context->streams[0] = dc_stream;
- dc_stream_retain(context->streams[0]);
- context->stream_count++;
-
- result = resource_map_pool_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = resource_map_phy_clock_resources(dc, context, dc_stream);
-
- if (result == DC_OK)
- result = build_mapped_resource(dc, context, dc_stream);
-
- if (result == DC_OK) {
- validate_guaranteed_copy_streams(
- context, dc->caps.max_streams);
- result = resource_build_scaling_params_for_context(dc, context);
- }
- if (result == DC_OK && !dcn_validate_bandwidth(dc, context))
- return DC_FAIL_BANDWIDTH_VALIDATE;
-
- return result;
-}
-
static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer(
struct dc_state *context,
const struct resource_pool *pool,
@@ -978,235 +942,16 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer(
return idle_pipe;
}
-enum dcc_control {
- dcc_control__256_256_xxx,
- dcc_control__128_128_xxx,
- dcc_control__256_64_64,
-};
-
-enum segment_order {
- segment_order__na,
- segment_order__contiguous,
- segment_order__non_contiguous,
-};
-
-static bool dcc_support_pixel_format(
- enum surface_pixel_format format,
- unsigned int *bytes_per_element)
-{
- /* DML: get_bytes_per_element */
- switch (format) {
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
- case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
- *bytes_per_element = 2;
- return true;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
- *bytes_per_element = 4;
- return true;
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
- case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
- case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
- *bytes_per_element = 8;
- return true;
- default:
- return false;
- }
-}
-
-static bool dcc_support_swizzle(
- enum swizzle_mode_values swizzle,
- unsigned int bytes_per_element,
- enum segment_order *segment_order_horz,
- enum segment_order *segment_order_vert)
-{
- bool standard_swizzle = false;
- bool display_swizzle = false;
-
- switch (swizzle) {
- case DC_SW_4KB_S:
- case DC_SW_64KB_S:
- case DC_SW_VAR_S:
- case DC_SW_4KB_S_X:
- case DC_SW_64KB_S_X:
- case DC_SW_VAR_S_X:
- standard_swizzle = true;
- break;
- case DC_SW_4KB_D:
- case DC_SW_64KB_D:
- case DC_SW_VAR_D:
- case DC_SW_4KB_D_X:
- case DC_SW_64KB_D_X:
- case DC_SW_VAR_D_X:
- display_swizzle = true;
- break;
- default:
- break;
- }
-
- if (bytes_per_element == 1 && standard_swizzle) {
- *segment_order_horz = segment_order__contiguous;
- *segment_order_vert = segment_order__na;
- return true;
- }
- if (bytes_per_element == 2 && standard_swizzle) {
- *segment_order_horz = segment_order__non_contiguous;
- *segment_order_vert = segment_order__contiguous;
- return true;
- }
- if (bytes_per_element == 4 && standard_swizzle) {
- *segment_order_horz = segment_order__non_contiguous;
- *segment_order_vert = segment_order__contiguous;
- return true;
- }
- if (bytes_per_element == 8 && standard_swizzle) {
- *segment_order_horz = segment_order__na;
- *segment_order_vert = segment_order__contiguous;
- return true;
- }
- if (bytes_per_element == 8 && display_swizzle) {
- *segment_order_horz = segment_order__contiguous;
- *segment_order_vert = segment_order__non_contiguous;
- return true;
- }
-
- return false;
-}
-
-static void get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
- unsigned int bytes_per_element)
-{
- /* copied from DML. might want to refactor DML to leverage from DML */
- /* DML : get_blk256_size */
- if (bytes_per_element == 1) {
- *blk256_width = 16;
- *blk256_height = 16;
- } else if (bytes_per_element == 2) {
- *blk256_width = 16;
- *blk256_height = 8;
- } else if (bytes_per_element == 4) {
- *blk256_width = 8;
- *blk256_height = 8;
- } else if (bytes_per_element == 8) {
- *blk256_width = 8;
- *blk256_height = 4;
- }
-}
-
-static void det_request_size(
- unsigned int height,
- unsigned int width,
- unsigned int bpe,
- bool *req128_horz_wc,
- bool *req128_vert_wc)
-{
- unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */
-
- unsigned int blk256_height = 0;
- unsigned int blk256_width = 0;
- unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
-
- get_blk256_size(&blk256_width, &blk256_height, bpe);
-
- swath_bytes_horz_wc = height * blk256_height * bpe;
- swath_bytes_vert_wc = width * blk256_width * bpe;
-
- *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
- false : /* full 256B request */
- true; /* half 128b request */
-
- *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
- false : /* full 256B request */
- true; /* half 128b request */
-}
-
-static bool get_dcc_compression_cap(const struct dc *dc,
+static bool dcn10_get_dcc_compression_cap(const struct dc *dc,
const struct dc_dcc_surface_param *input,
struct dc_surface_dcc_cap *output)
{
- /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
- enum dcc_control dcc_control;
- unsigned int bpe;
- enum segment_order segment_order_horz, segment_order_vert;
- bool req128_horz_wc, req128_vert_wc;
-
- memset(output, 0, sizeof(*output));
-
- if (dc->debug.disable_dcc == DCC_DISABLE)
- return false;
-
- if (!dcc_support_pixel_format(input->format,
- &bpe))
- return false;
-
- if (!dcc_support_swizzle(input->swizzle_mode, bpe,
- &segment_order_horz, &segment_order_vert))
- return false;
-
- det_request_size(input->surface_size.height, input->surface_size.width,
- bpe, &req128_horz_wc, &req128_vert_wc);
-
- if (!req128_horz_wc && !req128_vert_wc) {
- dcc_control = dcc_control__256_256_xxx;
- } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
- if (!req128_horz_wc)
- dcc_control = dcc_control__256_256_xxx;
- else if (segment_order_horz == segment_order__contiguous)
- dcc_control = dcc_control__128_128_xxx;
- else
- dcc_control = dcc_control__256_64_64;
- } else if (input->scan == SCAN_DIRECTION_VERTICAL) {
- if (!req128_vert_wc)
- dcc_control = dcc_control__256_256_xxx;
- else if (segment_order_vert == segment_order__contiguous)
- dcc_control = dcc_control__128_128_xxx;
- else
- dcc_control = dcc_control__256_64_64;
- } else {
- if ((req128_horz_wc &&
- segment_order_horz == segment_order__non_contiguous) ||
- (req128_vert_wc &&
- segment_order_vert == segment_order__non_contiguous))
- /* access_dir not known, must use most constraining */
- dcc_control = dcc_control__256_64_64;
- else
- /* reg128 is true for either horz and vert
- * but segment_order is contiguous
- */
- dcc_control = dcc_control__128_128_xxx;
- }
-
- if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
- dcc_control != dcc_control__256_256_xxx)
- return false;
-
- switch (dcc_control) {
- case dcc_control__256_256_xxx:
- output->grph.rgb.max_uncompressed_blk_size = 256;
- output->grph.rgb.max_compressed_blk_size = 256;
- output->grph.rgb.independent_64b_blks = false;
- break;
- case dcc_control__128_128_xxx:
- output->grph.rgb.max_uncompressed_blk_size = 128;
- output->grph.rgb.max_compressed_blk_size = 128;
- output->grph.rgb.independent_64b_blks = false;
- break;
- case dcc_control__256_64_64:
- output->grph.rgb.max_uncompressed_blk_size = 256;
- output->grph.rgb.max_compressed_blk_size = 64;
- output->grph.rgb.independent_64b_blks = true;
- break;
- }
-
- output->capable = true;
- output->const_color_support = false;
-
- return true;
+ return dc->res_pool->hubbub->funcs->get_dcc_compression_cap(
+ dc->res_pool->hubbub,
+ input,
+ output);
}
-
static void dcn10_destroy_resource_pool(struct resource_pool **pool)
{
struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool);
@@ -1227,13 +972,12 @@ static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_st
}
static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = get_dcc_compression_cap
+ .get_dcc_compression_cap = dcn10_get_dcc_compression_cap
};
static struct resource_funcs dcn10_res_pool_funcs = {
.destroy = dcn10_destroy_resource_pool,
.link_enc_create = dcn10_link_encoder_create,
- .validate_guaranteed = dcn10_validate_guaranteed,
.validate_bandwidth = dcn_validate_bandwidth,
.acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer,
.validate_plane = dcn10_validate_plane,
@@ -1282,6 +1026,7 @@ static bool construct(
dc->caps.max_cursor_size = 256;
dc->caps.max_slave_planes = 1;
dc->caps.is_apu = true;
+ dc->caps.post_blend_color_processing = false;
if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
dc->debug = debug_defaults_drv;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
new file mode 100644
index 000000000000..653b7b2efe2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -0,0 +1,1490 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn10_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define DP_BLANK_MAX_RETRY 20
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+
+enum {
+ DP_MST_UPDATE_MAX_RETRY = 50
+};
+
+#define CTX \
+ enc1->base.ctx
+
+void enc1_update_generic_info_packet(
+ struct dcn10_stream_encoder *enc1,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet)
+{
+ uint32_t regval;
+ /* TODOFPGA Figure out a proper number for max_retries polling for lock
+ * use 50 for now.
+ */
+ uint32_t max_retries = 50;
+
+ /*we need turn on clock before programming AFMT block*/
+ REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
+
+ if (packet_index >= 8)
+ ASSERT(0);
+
+ /* poll dig_update_lock is not locked -> asic internal signal
+ * assume otg master lock will unlock it
+ */
+/* REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS,
+ 0, 10, max_retries);*/
+
+ /* check if HW reading GSP memory */
+ REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT,
+ 0, 10, max_retries);
+
+ /* HW does is not reading GSP memory not reading too long ->
+ * something wrong. clear GPS memory access and notify?
+ * hw SW is writing to GSP memory
+ */
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
+
+ /* choose which generic packet to use */
+ regval = REG_READ(AFMT_VBI_PACKET_CONTROL);
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
+ AFMT_GENERIC_INDEX, packet_index);
+
+ /* write generic packet header
+ * (4th byte is for GENERIC0 only)
+ */
+ REG_SET_4(AFMT_GENERIC_HDR, 0,
+ AFMT_GENERIC_HB0, info_packet->hb0,
+ AFMT_GENERIC_HB1, info_packet->hb1,
+ AFMT_GENERIC_HB2, info_packet->hb2,
+ AFMT_GENERIC_HB3, info_packet->hb3);
+
+ /* write generic packet contents
+ * (we never use last 4 bytes)
+ * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers
+ */
+ {
+ const uint32_t *content =
+ (const uint32_t *) &info_packet->sb[0];
+
+ REG_WRITE(AFMT_GENERIC_0, *content++);
+ REG_WRITE(AFMT_GENERIC_1, *content++);
+ REG_WRITE(AFMT_GENERIC_2, *content++);
+ REG_WRITE(AFMT_GENERIC_3, *content++);
+ REG_WRITE(AFMT_GENERIC_4, *content++);
+ REG_WRITE(AFMT_GENERIC_5, *content++);
+ REG_WRITE(AFMT_GENERIC_6, *content++);
+ REG_WRITE(AFMT_GENERIC_7, *content);
+ }
+
+ switch (packet_index) {
+ case 0:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC0_FRAME_UPDATE, 1);
+ break;
+ case 1:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC1_FRAME_UPDATE, 1);
+ break;
+ case 2:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC2_FRAME_UPDATE, 1);
+ break;
+ case 3:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC3_FRAME_UPDATE, 1);
+ break;
+ case 4:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC4_FRAME_UPDATE, 1);
+ break;
+ case 5:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC5_FRAME_UPDATE, 1);
+ break;
+ case 6:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC6_FRAME_UPDATE, 1);
+ break;
+ case 7:
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC7_FRAME_UPDATE, 1);
+ break;
+ default:
+ break;
+ }
+}
+
+static void enc1_update_hdmi_info_packet(
+ struct dcn10_stream_encoder *enc1,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet)
+{
+ uint32_t cont, send, line;
+
+ if (info_packet->valid) {
+ enc1_update_generic_info_packet(
+ enc1,
+ packet_index,
+ info_packet);
+
+ /* enable transmission of packet(s) -
+ * packet transmission begins on the next frame
+ */
+ cont = 1;
+ /* send packet(s) every frame */
+ send = 1;
+ /* select line number to send packets on */
+ line = 2;
+ } else {
+ cont = 0;
+ send = 0;
+ line = 0;
+ }
+
+ /* choose which generic packet control to use */
+ switch (packet_index) {
+ case 0:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0,
+ HDMI_GENERIC0_CONT, cont,
+ HDMI_GENERIC0_SEND, send,
+ HDMI_GENERIC0_LINE, line);
+ break;
+ case 1:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0,
+ HDMI_GENERIC1_CONT, cont,
+ HDMI_GENERIC1_SEND, send,
+ HDMI_GENERIC1_LINE, line);
+ break;
+ case 2:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1,
+ HDMI_GENERIC0_CONT, cont,
+ HDMI_GENERIC0_SEND, send,
+ HDMI_GENERIC0_LINE, line);
+ break;
+ case 3:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1,
+ HDMI_GENERIC1_CONT, cont,
+ HDMI_GENERIC1_SEND, send,
+ HDMI_GENERIC1_LINE, line);
+ break;
+ case 4:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
+ HDMI_GENERIC0_CONT, cont,
+ HDMI_GENERIC0_SEND, send,
+ HDMI_GENERIC0_LINE, line);
+ break;
+ case 5:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
+ HDMI_GENERIC1_CONT, cont,
+ HDMI_GENERIC1_SEND, send,
+ HDMI_GENERIC1_LINE, line);
+ break;
+ case 6:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3,
+ HDMI_GENERIC0_CONT, cont,
+ HDMI_GENERIC0_SEND, send,
+ HDMI_GENERIC0_LINE, line);
+ break;
+ case 7:
+ REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3,
+ HDMI_GENERIC1_CONT, cont,
+ HDMI_GENERIC1_SEND, send,
+ HDMI_GENERIC1_LINE, line);
+ break;
+ default:
+ /* invalid HW packet index */
+ DC_LOG_WARNING(
+ "Invalid HW packet index: %s()\n",
+ __func__);
+ return;
+ }
+}
+
+/* setup stream encoder in dp mode */
+void enc1_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space)
+{
+ uint32_t h_active_start;
+ uint32_t v_active_start;
+ uint32_t misc0 = 0;
+ uint32_t misc1 = 0;
+ uint32_t h_blank;
+ uint32_t h_back_porch;
+ uint8_t synchronous_clock = 0; /* asynchronous mode */
+ uint8_t colorimetry_bpc;
+ uint8_t dynamic_range_rgb = 0; /*full range*/
+ uint8_t dynamic_range_ycbcr = 1; /*bt709*/
+
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1);
+
+ /* set pixel encoding */
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+ DP_PIXEL_ENCODING_TYPE_YCBCR422);
+ break;
+ case PIXEL_ENCODING_YCBCR444:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+ DP_PIXEL_ENCODING_TYPE_YCBCR444);
+
+ if (crtc_timing->flags.Y_ONLY)
+ if (crtc_timing->display_color_depth != COLOR_DEPTH_666)
+ /* HW testing only, no use case yet.
+ * Color depth of Y-only could be
+ * 8, 10, 12, 16 bits
+ */
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+ DP_PIXEL_ENCODING_TYPE_Y_ONLY);
+ /* Note: DP_MSA_MISC1 bit 7 is the indicator
+ * of Y-only mode.
+ * This bit is set in HW if register
+ * DP_PIXEL_ENCODING is programmed to 0x4
+ */
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+ DP_PIXEL_ENCODING_TYPE_YCBCR420);
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1);
+ break;
+ default:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+ DP_PIXEL_ENCODING_TYPE_RGB444);
+ break;
+ }
+
+ misc1 = REG_READ(DP_MSA_MISC);
+
+ /* set color depth */
+
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_666:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+ 0);
+ break;
+ case COLOR_DEPTH_888:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+ DP_COMPONENT_PIXEL_DEPTH_8BPC);
+ break;
+ case COLOR_DEPTH_101010:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+ DP_COMPONENT_PIXEL_DEPTH_10BPC);
+
+ break;
+ case COLOR_DEPTH_121212:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+ DP_COMPONENT_PIXEL_DEPTH_12BPC);
+ break;
+ default:
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+ DP_COMPONENT_PIXEL_DEPTH_6BPC);
+ break;
+ }
+
+ /* set dynamic range and YCbCr range */
+
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_666:
+ colorimetry_bpc = 0;
+ break;
+ case COLOR_DEPTH_888:
+ colorimetry_bpc = 1;
+ break;
+ case COLOR_DEPTH_101010:
+ colorimetry_bpc = 2;
+ break;
+ case COLOR_DEPTH_121212:
+ colorimetry_bpc = 3;
+ break;
+ default:
+ colorimetry_bpc = 0;
+ break;
+ }
+
+ misc0 = misc0 | synchronous_clock;
+ misc0 = colorimetry_bpc << 5;
+
+ switch (output_color_space) {
+ case COLOR_SPACE_SRGB:
+ misc0 = misc0 | 0x0;
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ dynamic_range_rgb = 0; /*full range*/
+ break;
+ case COLOR_SPACE_SRGB_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ dynamic_range_rgb = 1; /*limited range*/
+ break;
+ case COLOR_SPACE_YCBCR601:
+ case COLOR_SPACE_YCBCR601_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ dynamic_range_ycbcr = 0; /*bt601*/
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_LIMITED:
+ misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ dynamic_range_ycbcr = 1; /*bt709*/
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+ dynamic_range_rgb = 1; /*limited range*/
+ break;
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_XR_RGB:
+ case COLOR_SPACE_MSREF_SCRGB:
+ case COLOR_SPACE_ADOBERGB:
+ case COLOR_SPACE_DCIP3:
+ case COLOR_SPACE_XV_YCC_709:
+ case COLOR_SPACE_XV_YCC_601:
+ case COLOR_SPACE_DISPLAYNATIVE:
+ case COLOR_SPACE_DOLBYVISION:
+ case COLOR_SPACE_APPCTRL:
+ case COLOR_SPACE_CUSTOMPOINTS:
+ case COLOR_SPACE_UNKNOWN:
+ /* do nothing */
+ break;
+ }
+
+ REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
+ REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */
+
+ /* dcn new register
+ * dc_crtc_timing is vesa dmt struct. data from edid
+ */
+ REG_SET_2(DP_MSA_TIMING_PARAM1, 0,
+ DP_MSA_HTOTAL, crtc_timing->h_total,
+ DP_MSA_VTOTAL, crtc_timing->v_total);
+
+ /* calculate from vesa timing parameters
+ * h_active_start related to leading edge of sync
+ */
+
+ h_blank = crtc_timing->h_total - crtc_timing->h_border_left -
+ crtc_timing->h_addressable - crtc_timing->h_border_right;
+
+ h_back_porch = h_blank - crtc_timing->h_front_porch -
+ crtc_timing->h_sync_width;
+
+ /* start at beginning of left border */
+ h_active_start = crtc_timing->h_sync_width + h_back_porch;
+
+
+ v_active_start = crtc_timing->v_total - crtc_timing->v_border_top -
+ crtc_timing->v_addressable - crtc_timing->v_border_bottom -
+ crtc_timing->v_front_porch;
+
+
+ /* start at beginning of left border */
+ REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
+ DP_MSA_HSTART, h_active_start,
+ DP_MSA_VSTART, v_active_start);
+
+ REG_SET_4(DP_MSA_TIMING_PARAM3, 0,
+ DP_MSA_HSYNCWIDTH,
+ crtc_timing->h_sync_width,
+ DP_MSA_HSYNCPOLARITY,
+ !crtc_timing->flags.HSYNC_POSITIVE_POLARITY,
+ DP_MSA_VSYNCWIDTH,
+ crtc_timing->v_sync_width,
+ DP_MSA_VSYNCPOLARITY,
+ !crtc_timing->flags.VSYNC_POSITIVE_POLARITY);
+
+ /* HWDITH include border or overscan */
+ REG_SET_2(DP_MSA_TIMING_PARAM4, 0,
+ DP_MSA_HWIDTH, crtc_timing->h_border_left +
+ crtc_timing->h_addressable + crtc_timing->h_border_right,
+ DP_MSA_VHEIGHT, crtc_timing->v_border_top +
+ crtc_timing->v_addressable + crtc_timing->v_border_bottom);
+}
+
+static void enc1_stream_encoder_set_stream_attribute_helper(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_crtc_timing *crtc_timing)
+{
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 1);
+ break;
+ default:
+ REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 0);
+ break;
+ }
+ REG_UPDATE(DIG_FE_CNTL, TMDS_COLOR_FORMAT, 0);
+}
+
+/* setup stream encoder in hdmi mode */
+void enc1_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_5(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* following belongs to audio */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+/* setup stream encoder in dvi mode */
+void enc1_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_khz;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+void enc1_stream_encoder_set_mst_bandwidth(
+ struct stream_encoder *enc,
+ struct fixed31_32 avg_time_slots_per_mtp)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t x = dc_fixpt_floor(
+ avg_time_slots_per_mtp);
+ uint32_t y = dc_fixpt_ceil(
+ dc_fixpt_shl(
+ dc_fixpt_sub_int(
+ avg_time_slots_per_mtp,
+ x),
+ 26));
+
+ REG_SET_2(DP_MSE_RATE_CNTL, 0,
+ DP_MSE_RATE_X, x,
+ DP_MSE_RATE_Y, y);
+
+ /* wait for update to be completed on the link */
+ /* i.e. DP_MSE_RATE_UPDATE_PENDING field (read only) */
+ /* is reset to 0 (not pending) */
+ REG_WAIT(DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING,
+ 0,
+ 10, DP_MST_UPDATE_MAX_RETRY);
+}
+
+static void enc1_stream_encoder_update_hdmi_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ /* for bring up, disable dp double TODO */
+ REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1);
+
+ enc1_update_hdmi_info_packet(enc1, 0, &info_frame->avi);
+ enc1_update_hdmi_info_packet(enc1, 1, &info_frame->vendor);
+ enc1_update_hdmi_info_packet(enc1, 2, &info_frame->gamut);
+ enc1_update_hdmi_info_packet(enc1, 3, &info_frame->spd);
+ enc1_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd);
+}
+
+static void enc1_stream_encoder_stop_hdmi_info_packets(
+ struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ /* stop generic packets 0 & 1 on HDMI */
+ REG_SET_6(HDMI_GENERIC_PACKET_CONTROL0, 0,
+ HDMI_GENERIC1_CONT, 0,
+ HDMI_GENERIC1_LINE, 0,
+ HDMI_GENERIC1_SEND, 0,
+ HDMI_GENERIC0_CONT, 0,
+ HDMI_GENERIC0_LINE, 0,
+ HDMI_GENERIC0_SEND, 0);
+
+ /* stop generic packets 2 & 3 on HDMI */
+ REG_SET_6(HDMI_GENERIC_PACKET_CONTROL1, 0,
+ HDMI_GENERIC0_CONT, 0,
+ HDMI_GENERIC0_LINE, 0,
+ HDMI_GENERIC0_SEND, 0,
+ HDMI_GENERIC1_CONT, 0,
+ HDMI_GENERIC1_LINE, 0,
+ HDMI_GENERIC1_SEND, 0);
+
+ /* stop generic packets 2 & 3 on HDMI */
+ REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0,
+ HDMI_GENERIC0_CONT, 0,
+ HDMI_GENERIC0_LINE, 0,
+ HDMI_GENERIC0_SEND, 0,
+ HDMI_GENERIC1_CONT, 0,
+ HDMI_GENERIC1_LINE, 0,
+ HDMI_GENERIC1_SEND, 0);
+
+ REG_SET_6(HDMI_GENERIC_PACKET_CONTROL3, 0,
+ HDMI_GENERIC0_CONT, 0,
+ HDMI_GENERIC0_LINE, 0,
+ HDMI_GENERIC0_SEND, 0,
+ HDMI_GENERIC1_CONT, 0,
+ HDMI_GENERIC1_LINE, 0,
+ HDMI_GENERIC1_SEND, 0);
+}
+
+void enc1_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value = 0;
+
+ if (info_frame->vsc.valid)
+ enc1_update_generic_info_packet(
+ enc1,
+ 0, /* packetIndex */
+ &info_frame->vsc);
+
+ if (info_frame->spd.valid)
+ enc1_update_generic_info_packet(
+ enc1,
+ 2, /* packetIndex */
+ &info_frame->spd);
+
+ if (info_frame->hdrsmd.valid)
+ enc1_update_generic_info_packet(
+ enc1,
+ 3, /* packetIndex */
+ &info_frame->hdrsmd);
+
+ /* enable/disable transmission of packet(s).
+ * If enabled, packet transmission begins on the next frame
+ */
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
+
+
+ /* This bit is the master enable bit.
+ * When enabling secondary stream engine,
+ * this master bit must also be set.
+ * This register shared with audio info frame.
+ * Therefore we need to enable master bit
+ * if at least on of the fields is not 0
+ */
+ value = REG_READ(DP_SEC_CNTL);
+ if (value)
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+void enc1_stream_encoder_stop_dp_info_packets(
+ struct stream_encoder *enc)
+{
+ /* stop generic packets on DP */
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value = 0;
+
+ REG_SET_10(DP_SEC_CNTL, 0,
+ DP_SEC_GSP0_ENABLE, 0,
+ DP_SEC_GSP1_ENABLE, 0,
+ DP_SEC_GSP2_ENABLE, 0,
+ DP_SEC_GSP3_ENABLE, 0,
+ DP_SEC_GSP4_ENABLE, 0,
+ DP_SEC_GSP5_ENABLE, 0,
+ DP_SEC_GSP6_ENABLE, 0,
+ DP_SEC_GSP7_ENABLE, 0,
+ DP_SEC_MPG_ENABLE, 0,
+ DP_SEC_STREAM_ENABLE, 0);
+
+ /* this register shared with audio info frame.
+ * therefore we need to keep master enabled
+ * if at least one of the fields is not 0 */
+ value = REG_READ(DP_SEC_CNTL);
+ if (value)
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+
+}
+
+void enc1_stream_encoder_dp_blank(
+ struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t retries = 0;
+ uint32_t reg1 = 0;
+ uint32_t max_retries = DP_BLANK_MAX_RETRY * 10;
+
+ /* Note: For CZ, we are changing driver default to disable
+ * stream deferred to next VBLANK. If results are positive, we
+ * will make the same change to all DCE versions. There are a
+ * handful of panels that cannot handle disable stream at
+ * HBLANK and will result in a white line flash across the
+ * screen on stream disable.
+ */
+ REG_GET(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, &reg1);
+ if ((reg1 & 0x1) == 0)
+ /*stream not enabled*/
+ return;
+ /* Specify the video stream disable point
+ * (2 = start of the next vertical blank)
+ */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2);
+ /* Larger delay to wait until VBLANK - use max retry of
+ * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode +
+ * a little more because we may not trust delay accuracy.
+ */
+ max_retries = DP_BLANK_MAX_RETRY * 150;
+
+ /* disable DP stream */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0);
+
+ /* the encoder stops sending the video stream
+ * at the start of the vertical blanking.
+ * Poll for DP_VID_STREAM_STATUS == 0
+ */
+
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS,
+ 0,
+ 10, max_retries);
+
+ ASSERT(retries <= max_retries);
+
+ /* Tell the DP encoder to ignore timing from CRTC, must be done after
+ * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is
+ * complete, stream status will be stuck in video stream enabled state,
+ * i.e. DP_VID_STREAM_STATUS stuck at 1.
+ */
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true);
+}
+
+/* output video stream to link encoder */
+void enc1_stream_encoder_dp_unblank(
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+
+ uint64_t m_vid_l = n_vid;
+
+ m_vid_l *= param->pixel_clk_khz;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1);
+ }
+
+ /* set DIG_START to 0x1 to resync FIFO */
+
+ REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
+
+ /* switch DP encoder to CRTC data */
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always guarantees
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+}
+
+void enc1_stream_encoder_set_avmute(
+ struct stream_encoder *enc,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ unsigned int value = enable ? 1 : 0;
+
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, value);
+}
+
+
+#define DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT 0x8000
+#define DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC 1
+
+#include "include/audio_types.h"
+
+/**
+* speakersToChannels
+*
+* @brief
+* translate speakers to channels
+*
+* FL - Front Left
+* FR - Front Right
+* RL - Rear Left
+* RR - Rear Right
+* RC - Rear Center
+* FC - Front Center
+* FLC - Front Left Center
+* FRC - Front Right Center
+* RLC - Rear Left Center
+* RRC - Rear Right Center
+* LFE - Low Freq Effect
+*
+* FC
+* FLC FRC
+* FL FR
+*
+* LFE
+* ()
+*
+*
+* RL RR
+* RLC RRC
+* RC
+*
+* ch 8 7 6 5 4 3 2 1
+* 0b00000011 - - - - - - FR FL
+* 0b00000111 - - - - - LFE FR FL
+* 0b00001011 - - - - FC - FR FL
+* 0b00001111 - - - - FC LFE FR FL
+* 0b00010011 - - - RC - - FR FL
+* 0b00010111 - - - RC - LFE FR FL
+* 0b00011011 - - - RC FC - FR FL
+* 0b00011111 - - - RC FC LFE FR FL
+* 0b00110011 - - RR RL - - FR FL
+* 0b00110111 - - RR RL - LFE FR FL
+* 0b00111011 - - RR RL FC - FR FL
+* 0b00111111 - - RR RL FC LFE FR FL
+* 0b01110011 - RC RR RL - - FR FL
+* 0b01110111 - RC RR RL - LFE FR FL
+* 0b01111011 - RC RR RL FC - FR FL
+* 0b01111111 - RC RR RL FC LFE FR FL
+* 0b11110011 RRC RLC RR RL - - FR FL
+* 0b11110111 RRC RLC RR RL - LFE FR FL
+* 0b11111011 RRC RLC RR RL FC - FR FL
+* 0b11111111 RRC RLC RR RL FC LFE FR FL
+* 0b11000011 FRC FLC - - - - FR FL
+* 0b11000111 FRC FLC - - - LFE FR FL
+* 0b11001011 FRC FLC - - FC - FR FL
+* 0b11001111 FRC FLC - - FC LFE FR FL
+* 0b11010011 FRC FLC - RC - - FR FL
+* 0b11010111 FRC FLC - RC - LFE FR FL
+* 0b11011011 FRC FLC - RC FC - FR FL
+* 0b11011111 FRC FLC - RC FC LFE FR FL
+* 0b11110011 FRC FLC RR RL - - FR FL
+* 0b11110111 FRC FLC RR RL - LFE FR FL
+* 0b11111011 FRC FLC RR RL FC - FR FL
+* 0b11111111 FRC FLC RR RL FC LFE FR FL
+*
+* @param
+* speakers - speaker information as it comes from CEA audio block
+*/
+/* translate speakers to channels */
+
+union audio_cea_channels {
+ uint8_t all;
+ struct audio_cea_channels_bits {
+ uint32_t FL:1;
+ uint32_t FR:1;
+ uint32_t LFE:1;
+ uint32_t FC:1;
+ uint32_t RL_RC:1;
+ uint32_t RR:1;
+ uint32_t RC_RLC_FLC:1;
+ uint32_t RRC_FRC:1;
+ } channels;
+};
+
+struct audio_clock_info {
+ /* pixel clock frequency*/
+ uint32_t pixel_clock_in_10khz;
+ /* N - 32KHz audio */
+ uint32_t n_32khz;
+ /* CTS - 32KHz audio*/
+ uint32_t cts_32khz;
+ uint32_t n_44khz;
+ uint32_t cts_44khz;
+ uint32_t n_48khz;
+ uint32_t cts_48khz;
+};
+
+/* 25.2MHz/1.001*/
+/* 25.2MHz/1.001*/
+/* 25.2MHz*/
+/* 27MHz */
+/* 27MHz*1.001*/
+/* 27MHz*1.001*/
+/* 54MHz*/
+/* 54MHz*1.001*/
+/* 74.25MHz/1.001*/
+/* 74.25MHz*/
+/* 148.5MHz/1.001*/
+/* 148.5MHz*/
+
+static const struct audio_clock_info audio_clock_info_table[16] = {
+ {2517, 4576, 28125, 7007, 31250, 6864, 28125},
+ {2518, 4576, 28125, 7007, 31250, 6864, 28125},
+ {2520, 4096, 25200, 6272, 28000, 6144, 25200},
+ {2700, 4096, 27000, 6272, 30000, 6144, 27000},
+ {2702, 4096, 27027, 6272, 30030, 6144, 27027},
+ {2703, 4096, 27027, 6272, 30030, 6144, 27027},
+ {5400, 4096, 54000, 6272, 60000, 6144, 54000},
+ {5405, 4096, 54054, 6272, 60060, 6144, 54054},
+ {7417, 11648, 210937, 17836, 234375, 11648, 140625},
+ {7425, 4096, 74250, 6272, 82500, 6144, 74250},
+ {14835, 11648, 421875, 8918, 234375, 5824, 140625},
+ {14850, 4096, 148500, 6272, 165000, 6144, 148500},
+ {29670, 5824, 421875, 4459, 234375, 5824, 281250},
+ {29700, 3072, 222750, 4704, 247500, 5120, 247500},
+ {59340, 5824, 843750, 8918, 937500, 5824, 562500},
+ {59400, 3072, 445500, 9408, 990000, 6144, 594000}
+};
+
+static const struct audio_clock_info audio_clock_info_table_36bpc[14] = {
+ {2517, 9152, 84375, 7007, 48875, 9152, 56250},
+ {2518, 9152, 84375, 7007, 48875, 9152, 56250},
+ {2520, 4096, 37800, 6272, 42000, 6144, 37800},
+ {2700, 4096, 40500, 6272, 45000, 6144, 40500},
+ {2702, 8192, 81081, 6272, 45045, 8192, 54054},
+ {2703, 8192, 81081, 6272, 45045, 8192, 54054},
+ {5400, 4096, 81000, 6272, 90000, 6144, 81000},
+ {5405, 4096, 81081, 6272, 90090, 6144, 81081},
+ {7417, 11648, 316406, 17836, 351562, 11648, 210937},
+ {7425, 4096, 111375, 6272, 123750, 6144, 111375},
+ {14835, 11648, 632812, 17836, 703125, 11648, 421875},
+ {14850, 4096, 222750, 6272, 247500, 6144, 222750},
+ {29670, 5824, 632812, 8918, 703125, 5824, 421875},
+ {29700, 4096, 445500, 4704, 371250, 5120, 371250}
+};
+
+static const struct audio_clock_info audio_clock_info_table_48bpc[14] = {
+ {2517, 4576, 56250, 7007, 62500, 6864, 56250},
+ {2518, 4576, 56250, 7007, 62500, 6864, 56250},
+ {2520, 4096, 50400, 6272, 56000, 6144, 50400},
+ {2700, 4096, 54000, 6272, 60000, 6144, 54000},
+ {2702, 4096, 54054, 6267, 60060, 8192, 54054},
+ {2703, 4096, 54054, 6272, 60060, 8192, 54054},
+ {5400, 4096, 108000, 6272, 120000, 6144, 108000},
+ {5405, 4096, 108108, 6272, 120120, 6144, 108108},
+ {7417, 11648, 421875, 17836, 468750, 11648, 281250},
+ {7425, 4096, 148500, 6272, 165000, 6144, 148500},
+ {14835, 11648, 843750, 8918, 468750, 11648, 281250},
+ {14850, 4096, 297000, 6272, 330000, 6144, 297000},
+ {29670, 5824, 843750, 4459, 468750, 5824, 562500},
+ {29700, 3072, 445500, 4704, 495000, 5120, 495000}
+
+
+};
+
+static union audio_cea_channels speakers_to_channels(
+ struct audio_speaker_flags speaker_flags)
+{
+ union audio_cea_channels cea_channels = {0};
+
+ /* these are one to one */
+ cea_channels.channels.FL = speaker_flags.FL_FR;
+ cea_channels.channels.FR = speaker_flags.FL_FR;
+ cea_channels.channels.LFE = speaker_flags.LFE;
+ cea_channels.channels.FC = speaker_flags.FC;
+
+ /* if Rear Left and Right exist move RC speaker to channel 7
+ * otherwise to channel 5
+ */
+ if (speaker_flags.RL_RR) {
+ cea_channels.channels.RL_RC = speaker_flags.RL_RR;
+ cea_channels.channels.RR = speaker_flags.RL_RR;
+ cea_channels.channels.RC_RLC_FLC = speaker_flags.RC;
+ } else {
+ cea_channels.channels.RL_RC = speaker_flags.RC;
+ }
+
+ /* FRONT Left Right Center and REAR Left Right Center are exclusive */
+ if (speaker_flags.FLC_FRC) {
+ cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC;
+ cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC;
+ } else {
+ cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC;
+ cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC;
+ }
+
+ return cea_channels;
+}
+
+static uint32_t calc_max_audio_packets_per_line(
+ const struct audio_crtc_info *crtc_info)
+{
+ uint32_t max_packets_per_line;
+
+ max_packets_per_line =
+ crtc_info->h_total - crtc_info->h_active;
+
+ if (crtc_info->pixel_repetition)
+ max_packets_per_line *= crtc_info->pixel_repetition;
+
+ /* for other hdmi features */
+ max_packets_per_line -= 58;
+ /* for Control Period */
+ max_packets_per_line -= 16;
+ /* Number of Audio Packets per Line */
+ max_packets_per_line /= 32;
+
+ return max_packets_per_line;
+}
+
+static void get_audio_clock_info(
+ enum dc_color_depth color_depth,
+ uint32_t crtc_pixel_clock_in_khz,
+ uint32_t actual_pixel_clock_in_khz,
+ struct audio_clock_info *audio_clock_info)
+{
+ const struct audio_clock_info *clock_info;
+ uint32_t index;
+ uint32_t crtc_pixel_clock_in_10khz = crtc_pixel_clock_in_khz / 10;
+ uint32_t audio_array_size;
+
+ switch (color_depth) {
+ case COLOR_DEPTH_161616:
+ clock_info = audio_clock_info_table_48bpc;
+ audio_array_size = ARRAY_SIZE(
+ audio_clock_info_table_48bpc);
+ break;
+ case COLOR_DEPTH_121212:
+ clock_info = audio_clock_info_table_36bpc;
+ audio_array_size = ARRAY_SIZE(
+ audio_clock_info_table_36bpc);
+ break;
+ default:
+ clock_info = audio_clock_info_table;
+ audio_array_size = ARRAY_SIZE(
+ audio_clock_info_table);
+ break;
+ }
+
+ if (clock_info != NULL) {
+ /* search for exact pixel clock in table */
+ for (index = 0; index < audio_array_size; index++) {
+ if (clock_info[index].pixel_clock_in_10khz >
+ crtc_pixel_clock_in_10khz)
+ break; /* not match */
+ else if (clock_info[index].pixel_clock_in_10khz ==
+ crtc_pixel_clock_in_10khz) {
+ /* match found */
+ *audio_clock_info = clock_info[index];
+ return;
+ }
+ }
+ }
+
+ /* not found */
+ if (actual_pixel_clock_in_khz == 0)
+ actual_pixel_clock_in_khz = crtc_pixel_clock_in_khz;
+
+ /* See HDMI spec the table entry under
+ * pixel clock of "Other". */
+ audio_clock_info->pixel_clock_in_10khz =
+ actual_pixel_clock_in_khz / 10;
+ audio_clock_info->cts_32khz = actual_pixel_clock_in_khz;
+ audio_clock_info->cts_44khz = actual_pixel_clock_in_khz;
+ audio_clock_info->cts_48khz = actual_pixel_clock_in_khz;
+
+ audio_clock_info->n_32khz = 4096;
+ audio_clock_info->n_44khz = 6272;
+ audio_clock_info->n_48khz = 6144;
+}
+
+static void enc1_se_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *audio_info)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ uint32_t speakers = 0;
+ uint32_t channels = 0;
+
+ ASSERT(audio_info);
+ if (audio_info == NULL)
+ /* This should not happen.it does so we don't get BSOD*/
+ return;
+
+ speakers = audio_info->flags.info.ALLSPEAKERS;
+ channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
+
+ /* setup the audio stream source select (audio -> dig mapping) */
+ REG_SET(AFMT_AUDIO_SRC_CONTROL, 0, AFMT_AUDIO_SRC_SELECT, az_inst);
+
+ /* Channel allocation */
+ REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, channels);
+}
+
+static void enc1_se_setup_hdmi_audio(
+ struct stream_encoder *enc,
+ const struct audio_crtc_info *crtc_info)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ struct audio_clock_info audio_clock_info = {0};
+ uint32_t max_packets_per_line;
+
+ /* For now still do calculation, although this field is ignored when
+ * above HDMI_PACKET_GEN_VERSION set to 1
+ */
+ max_packets_per_line = calc_max_audio_packets_per_line(crtc_info);
+
+ /* HDMI_AUDIO_PACKET_CONTROL */
+ REG_UPDATE_2(HDMI_AUDIO_PACKET_CONTROL,
+ HDMI_AUDIO_PACKETS_PER_LINE, max_packets_per_line,
+ HDMI_AUDIO_DELAY_EN, 1);
+
+ /* AFMT_AUDIO_PACKET_CONTROL */
+ REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+
+ /* AFMT_AUDIO_PACKET_CONTROL2 */
+ REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2,
+ AFMT_AUDIO_LAYOUT_OVRD, 0,
+ AFMT_60958_OSF_OVRD, 0);
+
+ /* HDMI_ACR_PACKET_CONTROL */
+ REG_UPDATE_3(HDMI_ACR_PACKET_CONTROL,
+ HDMI_ACR_AUTO_SEND, 1,
+ HDMI_ACR_SOURCE, 0,
+ HDMI_ACR_AUDIO_PRIORITY, 0);
+
+ /* Program audio clock sample/regeneration parameters */
+ get_audio_clock_info(crtc_info->color_depth,
+ crtc_info->requested_pixel_clock,
+ crtc_info->calculated_pixel_clock,
+ &audio_clock_info);
+ DC_LOG_HW_AUDIO(
+ "\n%s:Input::requested_pixel_clock = %d" \
+ "calculated_pixel_clock = %d \n", __func__, \
+ crtc_info->requested_pixel_clock, \
+ crtc_info->calculated_pixel_clock);
+
+ /* HDMI_ACR_32_0__HDMI_ACR_CTS_32_MASK */
+ REG_UPDATE(HDMI_ACR_32_0, HDMI_ACR_CTS_32, audio_clock_info.cts_32khz);
+
+ /* HDMI_ACR_32_1__HDMI_ACR_N_32_MASK */
+ REG_UPDATE(HDMI_ACR_32_1, HDMI_ACR_N_32, audio_clock_info.n_32khz);
+
+ /* HDMI_ACR_44_0__HDMI_ACR_CTS_44_MASK */
+ REG_UPDATE(HDMI_ACR_44_0, HDMI_ACR_CTS_44, audio_clock_info.cts_44khz);
+
+ /* HDMI_ACR_44_1__HDMI_ACR_N_44_MASK */
+ REG_UPDATE(HDMI_ACR_44_1, HDMI_ACR_N_44, audio_clock_info.n_44khz);
+
+ /* HDMI_ACR_48_0__HDMI_ACR_CTS_48_MASK */
+ REG_UPDATE(HDMI_ACR_48_0, HDMI_ACR_CTS_48, audio_clock_info.cts_48khz);
+
+ /* HDMI_ACR_48_1__HDMI_ACR_N_48_MASK */
+ REG_UPDATE(HDMI_ACR_48_1, HDMI_ACR_N_48, audio_clock_info.n_48khz);
+
+ /* Video driver cannot know in advance which sample rate will
+ * be used by HD Audio driver
+ * HDMI_ACR_PACKET_CONTROL__HDMI_ACR_N_MULTIPLE field is
+ * programmed below in interruppt callback
+ */
+
+ /* AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L_MASK &
+ * AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK
+ */
+ REG_UPDATE_2(AFMT_60958_0,
+ AFMT_60958_CS_CHANNEL_NUMBER_L, 1,
+ AFMT_60958_CS_CLOCK_ACCURACY, 0);
+
+ /* AFMT_60958_1 AFMT_60958_CS_CHALNNEL_NUMBER_R */
+ REG_UPDATE(AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+
+ /* AFMT_60958_2 now keep this settings until
+ * Programming guide comes out
+ */
+ REG_UPDATE_6(AFMT_60958_2,
+ AFMT_60958_CS_CHANNEL_NUMBER_2, 3,
+ AFMT_60958_CS_CHANNEL_NUMBER_3, 4,
+ AFMT_60958_CS_CHANNEL_NUMBER_4, 5,
+ AFMT_60958_CS_CHANNEL_NUMBER_5, 6,
+ AFMT_60958_CS_CHANNEL_NUMBER_6, 7,
+ AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+}
+
+static void enc1_se_setup_dp_audio(
+ struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ /* --- DP Audio packet configurations --- */
+
+ /* ATP Configuration */
+ REG_SET(DP_SEC_AUD_N, 0,
+ DP_SEC_AUD_N, DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT);
+
+ /* Async/auto-calc timestamp mode */
+ REG_SET(DP_SEC_TIMESTAMP, 0, DP_SEC_TIMESTAMP_MODE,
+ DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC);
+
+ /* --- The following are the registers
+ * copied from the SetupHDMI ---
+ */
+
+ /* AFMT_AUDIO_PACKET_CONTROL */
+ REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+
+ /* AFMT_AUDIO_PACKET_CONTROL2 */
+ /* Program the ATP and AIP next */
+ REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2,
+ AFMT_AUDIO_LAYOUT_OVRD, 0,
+ AFMT_60958_OSF_OVRD, 0);
+
+ /* AFMT_INFOFRAME_CONTROL0 */
+ REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+
+ /* AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK */
+ REG_UPDATE(AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, 0);
+}
+
+static void enc1_se_enable_audio_clock(
+ struct stream_encoder *enc,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (REG(AFMT_CNTL) == 0)
+ return; /* DCE8/10 does not have this register */
+
+ REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, !!enable);
+
+ /* wait for AFMT clock to turn on,
+ * expectation: this should complete in 1-2 reads
+ *
+ * REG_WAIT(AFMT_CNTL, AFMT_AUDIO_CLOCK_ON, !!enable, 1, 10);
+ *
+ * TODO: wait for clock_on does not work well. May need HW
+ * program sequence. But audio seems work normally even without wait
+ * for clock_on status change
+ */
+}
+
+static void enc1_se_enable_dp_audio(
+ struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ /* Enable Audio packets */
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
+
+ /* Program the ATP and AIP next */
+ REG_UPDATE_2(DP_SEC_CNTL,
+ DP_SEC_ATP_ENABLE, 1,
+ DP_SEC_AIP_ENABLE, 1);
+
+ /* Program STREAM_ENABLE after all the other enables. */
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+static void enc1_se_disable_dp_audio(
+ struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value = 0;
+
+ /* Disable Audio packets */
+ REG_UPDATE_5(DP_SEC_CNTL,
+ DP_SEC_ASP_ENABLE, 0,
+ DP_SEC_ATP_ENABLE, 0,
+ DP_SEC_AIP_ENABLE, 0,
+ DP_SEC_ACM_ENABLE, 0,
+ DP_SEC_STREAM_ENABLE, 0);
+
+ /* This register shared with encoder info frame. Therefore we need to
+ * keep master enabled if at least on of the fields is not 0
+ */
+ value = REG_READ(DP_SEC_CNTL);
+ if (value != 0)
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+
+}
+
+void enc1_se_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, !mute);
+}
+
+void enc1_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info)
+{
+ enc1_se_audio_setup(enc, az_inst, info);
+}
+
+void enc1_se_dp_audio_enable(
+ struct stream_encoder *enc)
+{
+ enc1_se_enable_audio_clock(enc, true);
+ enc1_se_setup_dp_audio(enc);
+ enc1_se_enable_dp_audio(enc);
+}
+
+void enc1_se_dp_audio_disable(
+ struct stream_encoder *enc)
+{
+ enc1_se_disable_dp_audio(enc);
+ enc1_se_enable_audio_clock(enc, false);
+}
+
+void enc1_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info)
+{
+ enc1_se_enable_audio_clock(enc, true);
+ enc1_se_setup_hdmi_audio(enc, audio_crtc_info);
+ enc1_se_audio_setup(enc, az_inst, info);
+}
+
+void enc1_se_hdmi_audio_disable(
+ struct stream_encoder *enc)
+{
+ enc1_se_enable_audio_clock(enc, false);
+}
+
+
+void enc1_setup_stereo_sync(
+ struct stream_encoder *enc,
+ int tg_inst, bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, tg_inst);
+ REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable);
+}
+
+
+static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
+ .dp_set_stream_attribute =
+ enc1_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc1_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc1_stream_encoder_dvi_set_stream_attribute,
+ .set_mst_bandwidth =
+ enc1_stream_encoder_set_mst_bandwidth,
+ .update_hdmi_info_packets =
+ enc1_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc1_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets =
+ enc1_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc1_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc1_stream_encoder_dp_unblank,
+ .audio_mute_control = enc1_se_audio_mute_control,
+
+ .dp_audio_setup = enc1_se_dp_audio_setup,
+ .dp_audio_enable = enc1_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc1_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+};
+
+void dcn10_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn10_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
new file mode 100644
index 000000000000..6b3e4ded155b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_STREAM_ENCODER_DCN10_H__
+#define __DC_STREAM_ENCODER_DCN10_H__
+
+#include "stream_encoder.h"
+
+#define DCN10STRENC_FROM_STRENC(stream_encoder)\
+ container_of(stream_encoder, struct dcn10_stream_encoder, base)
+
+#define SE_COMMON_DCN_REG_LIST(id) \
+ SRI(AFMT_CNTL, DIG, id), \
+ SRI(AFMT_GENERIC_0, DIG, id), \
+ SRI(AFMT_GENERIC_1, DIG, id), \
+ SRI(AFMT_GENERIC_2, DIG, id), \
+ SRI(AFMT_GENERIC_3, DIG, id), \
+ SRI(AFMT_GENERIC_4, DIG, id), \
+ SRI(AFMT_GENERIC_5, DIG, id), \
+ SRI(AFMT_GENERIC_6, DIG, id), \
+ SRI(AFMT_GENERIC_7, DIG, id), \
+ SRI(AFMT_GENERIC_HDR, DIG, id), \
+ SRI(AFMT_INFOFRAME_CONTROL0, DIG, id), \
+ SRI(AFMT_VBI_PACKET_CONTROL, DIG, id), \
+ SRI(AFMT_VBI_PACKET_CONTROL1, DIG, id), \
+ SRI(AFMT_AUDIO_PACKET_CONTROL, DIG, id), \
+ SRI(AFMT_AUDIO_PACKET_CONTROL2, DIG, id), \
+ SRI(AFMT_AUDIO_SRC_CONTROL, DIG, id), \
+ SRI(AFMT_60958_0, DIG, id), \
+ SRI(AFMT_60958_1, DIG, id), \
+ SRI(AFMT_60958_2, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(HDMI_CONTROL, DIG, id), \
+ SRI(HDMI_DB_CONTROL, DIG, id), \
+ SRI(HDMI_GC, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_32_0, DIG, id),\
+ SRI(HDMI_ACR_32_1, DIG, id),\
+ SRI(HDMI_ACR_44_0, DIG, id),\
+ SRI(HDMI_ACR_44_1, DIG, id),\
+ SRI(HDMI_ACR_48_0, DIG, id),\
+ SRI(HDMI_ACR_48_1, DIG, id),\
+ SRI(DP_DB_CNTL, DP, id), \
+ SRI(DP_MSA_MISC, DP, id), \
+ SRI(DP_MSA_COLORIMETRY, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI(DP_MSE_RATE_CNTL, DP, id), \
+ SRI(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI(DP_PIXEL_FORMAT, DP, id), \
+ SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_STEER_FIFO, DP, id), \
+ SRI(DP_VID_M, DP, id), \
+ SRI(DP_VID_N, DP, id), \
+ SRI(DP_VID_STREAM_CNTL, DP, id), \
+ SRI(DP_VID_TIMING, DP, id), \
+ SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_TIMESTAMP, DP, id)
+
+#define SE_DCN_REG_LIST(id)\
+ SE_COMMON_DCN_REG_LIST(id)
+
+
+struct dcn10_stream_enc_registers {
+ uint32_t AFMT_CNTL;
+ uint32_t AFMT_AVI_INFO0;
+ uint32_t AFMT_AVI_INFO1;
+ uint32_t AFMT_AVI_INFO2;
+ uint32_t AFMT_AVI_INFO3;
+ uint32_t AFMT_GENERIC_0;
+ uint32_t AFMT_GENERIC_1;
+ uint32_t AFMT_GENERIC_2;
+ uint32_t AFMT_GENERIC_3;
+ uint32_t AFMT_GENERIC_4;
+ uint32_t AFMT_GENERIC_5;
+ uint32_t AFMT_GENERIC_6;
+ uint32_t AFMT_GENERIC_7;
+ uint32_t AFMT_GENERIC_HDR;
+ uint32_t AFMT_INFOFRAME_CONTROL0;
+ uint32_t AFMT_VBI_PACKET_CONTROL;
+ uint32_t AFMT_VBI_PACKET_CONTROL1;
+ uint32_t AFMT_AUDIO_PACKET_CONTROL;
+ uint32_t AFMT_AUDIO_PACKET_CONTROL2;
+ uint32_t AFMT_AUDIO_SRC_CONTROL;
+ uint32_t AFMT_60958_0;
+ uint32_t AFMT_60958_1;
+ uint32_t AFMT_60958_2;
+ uint32_t DIG_FE_CNTL;
+ uint32_t DP_MSE_RATE_CNTL;
+ uint32_t DP_MSE_RATE_UPDATE;
+ uint32_t DP_PIXEL_FORMAT;
+ uint32_t DP_SEC_CNTL;
+ uint32_t DP_STEER_FIFO;
+ uint32_t DP_VID_M;
+ uint32_t DP_VID_N;
+ uint32_t DP_VID_STREAM_CNTL;
+ uint32_t DP_VID_TIMING;
+ uint32_t DP_SEC_AUD_N;
+ uint32_t DP_SEC_TIMESTAMP;
+ uint32_t HDMI_CONTROL;
+ uint32_t HDMI_GC;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL0;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL1;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL2;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL3;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL4;
+ uint32_t HDMI_GENERIC_PACKET_CONTROL5;
+ uint32_t HDMI_INFOFRAME_CONTROL0;
+ uint32_t HDMI_INFOFRAME_CONTROL1;
+ uint32_t HDMI_VBI_PACKET_CONTROL;
+ uint32_t HDMI_AUDIO_PACKET_CONTROL;
+ uint32_t HDMI_ACR_PACKET_CONTROL;
+ uint32_t HDMI_ACR_32_0;
+ uint32_t HDMI_ACR_32_1;
+ uint32_t HDMI_ACR_44_0;
+ uint32_t HDMI_ACR_44_1;
+ uint32_t HDMI_ACR_48_0;
+ uint32_t HDMI_ACR_48_1;
+ uint32_t DP_DB_CNTL;
+ uint32_t DP_MSA_MISC;
+ uint32_t DP_MSA_COLORIMETRY;
+ uint32_t DP_MSA_TIMING_PARAM1;
+ uint32_t DP_MSA_TIMING_PARAM2;
+ uint32_t DP_MSA_TIMING_PARAM3;
+ uint32_t DP_MSA_TIMING_PARAM4;
+ uint32_t HDMI_DB_CONTROL;
+};
+
+
+#define SE_SF(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
+ SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\
+ SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\
+ SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB2, mask_sh),\
+ SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB3, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_START, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_LAYOUT_OVRD, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_60958_OSF_OVRD, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, mask_sh),\
+ SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh)
+
+#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
+ SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)
+
+#define SE_COMMON_MASK_SH_LIST_DCN10(mask_sh)\
+ SE_COMMON_MASK_SH_LIST_SOC(mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_LINE, mask_sh)
+
+
+#define SE_REG_FIELD_LIST_DCN1_0(type) \
+ type AFMT_GENERIC_INDEX;\
+ type AFMT_GENERIC_HB0;\
+ type AFMT_GENERIC_HB1;\
+ type AFMT_GENERIC_HB2;\
+ type AFMT_GENERIC_HB3;\
+ type AFMT_GENERIC_LOCK_STATUS;\
+ type AFMT_GENERIC_CONFLICT;\
+ type AFMT_GENERIC_CONFLICT_CLR;\
+ type AFMT_GENERIC0_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC1_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC0_FRAME_UPDATE;\
+ type AFMT_GENERIC1_FRAME_UPDATE;\
+ type AFMT_GENERIC2_FRAME_UPDATE;\
+ type AFMT_GENERIC3_FRAME_UPDATE;\
+ type AFMT_GENERIC4_FRAME_UPDATE;\
+ type AFMT_GENERIC5_FRAME_UPDATE;\
+ type AFMT_GENERIC6_FRAME_UPDATE;\
+ type AFMT_GENERIC7_FRAME_UPDATE;\
+ type HDMI_GENERIC0_CONT;\
+ type HDMI_GENERIC0_SEND;\
+ type HDMI_GENERIC0_LINE;\
+ type HDMI_GENERIC1_CONT;\
+ type HDMI_GENERIC1_SEND;\
+ type HDMI_GENERIC1_LINE;\
+ type HDMI_GENERIC2_CONT;\
+ type HDMI_GENERIC2_SEND;\
+ type HDMI_GENERIC2_LINE;\
+ type HDMI_GENERIC3_CONT;\
+ type HDMI_GENERIC3_SEND;\
+ type HDMI_GENERIC3_LINE;\
+ type HDMI_GENERIC4_CONT;\
+ type HDMI_GENERIC4_SEND;\
+ type HDMI_GENERIC4_LINE;\
+ type HDMI_GENERIC5_CONT;\
+ type HDMI_GENERIC5_SEND;\
+ type HDMI_GENERIC5_LINE;\
+ type HDMI_GENERIC6_CONT;\
+ type HDMI_GENERIC6_SEND;\
+ type HDMI_GENERIC6_LINE;\
+ type HDMI_GENERIC7_CONT;\
+ type HDMI_GENERIC7_SEND;\
+ type HDMI_GENERIC7_LINE;\
+ type DP_PIXEL_ENCODING;\
+ type DP_COMPONENT_DEPTH;\
+ type HDMI_PACKET_GEN_VERSION;\
+ type HDMI_KEEPOUT_MODE;\
+ type HDMI_DEEP_COLOR_ENABLE;\
+ type HDMI_CLOCK_CHANNEL_RATE;\
+ type HDMI_DEEP_COLOR_DEPTH;\
+ type HDMI_GC_CONT;\
+ type HDMI_GC_SEND;\
+ type HDMI_NULL_SEND;\
+ type HDMI_DATA_SCRAMBLE_EN;\
+ type HDMI_AUDIO_INFO_SEND;\
+ type AFMT_AUDIO_INFO_UPDATE;\
+ type HDMI_AUDIO_INFO_LINE;\
+ type HDMI_GC_AVMUTE;\
+ type DP_MSE_RATE_X;\
+ type DP_MSE_RATE_Y;\
+ type DP_MSE_RATE_UPDATE_PENDING;\
+ type DP_SEC_GSP0_ENABLE;\
+ type DP_SEC_STREAM_ENABLE;\
+ type DP_SEC_GSP1_ENABLE;\
+ type DP_SEC_GSP2_ENABLE;\
+ type DP_SEC_GSP3_ENABLE;\
+ type DP_SEC_GSP4_ENABLE;\
+ type DP_SEC_GSP5_ENABLE;\
+ type DP_SEC_GSP6_ENABLE;\
+ type DP_SEC_GSP7_ENABLE;\
+ type DP_SEC_MPG_ENABLE;\
+ type DP_VID_STREAM_DIS_DEFER;\
+ type DP_VID_STREAM_ENABLE;\
+ type DP_VID_STREAM_STATUS;\
+ type DP_STEER_FIFO_RESET;\
+ type DP_VID_M_N_GEN_EN;\
+ type DP_VID_N;\
+ type DP_VID_M;\
+ type DIG_START;\
+ type AFMT_AUDIO_SRC_SELECT;\
+ type AFMT_AUDIO_CHANNEL_ENABLE;\
+ type HDMI_AUDIO_PACKETS_PER_LINE;\
+ type HDMI_AUDIO_DELAY_EN;\
+ type AFMT_60958_CS_UPDATE;\
+ type AFMT_AUDIO_LAYOUT_OVRD;\
+ type AFMT_60958_OSF_OVRD;\
+ type HDMI_ACR_AUTO_SEND;\
+ type HDMI_ACR_SOURCE;\
+ type HDMI_ACR_AUDIO_PRIORITY;\
+ type HDMI_ACR_CTS_32;\
+ type HDMI_ACR_N_32;\
+ type HDMI_ACR_CTS_44;\
+ type HDMI_ACR_N_44;\
+ type HDMI_ACR_CTS_48;\
+ type HDMI_ACR_N_48;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_L;\
+ type AFMT_60958_CS_CLOCK_ACCURACY;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_R;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_2;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_3;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_4;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_5;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_6;\
+ type AFMT_60958_CS_CHANNEL_NUMBER_7;\
+ type DP_SEC_AUD_N;\
+ type DP_SEC_TIMESTAMP_MODE;\
+ type DP_SEC_ASP_ENABLE;\
+ type DP_SEC_ATP_ENABLE;\
+ type DP_SEC_AIP_ENABLE;\
+ type DP_SEC_ACM_ENABLE;\
+ type AFMT_AUDIO_SAMPLE_SEND;\
+ type AFMT_AUDIO_CLOCK_EN;\
+ type TMDS_PIXEL_ENCODING;\
+ type TMDS_COLOR_FORMAT;\
+ type DIG_STEREOSYNC_SELECT;\
+ type DIG_STEREOSYNC_GATE_EN;\
+ type DP_DB_DISABLE;\
+ type DP_MSA_MISC0;\
+ type DP_MSA_HTOTAL;\
+ type DP_MSA_VTOTAL;\
+ type DP_MSA_HSTART;\
+ type DP_MSA_VSTART;\
+ type DP_MSA_HSYNCWIDTH;\
+ type DP_MSA_HSYNCPOLARITY;\
+ type DP_MSA_VSYNCWIDTH;\
+ type DP_MSA_VSYNCPOLARITY;\
+ type DP_MSA_HWIDTH;\
+ type DP_MSA_VHEIGHT;\
+ type HDMI_DB_DISABLE;\
+ type DP_VID_N_MUL;\
+ type DP_VID_M_DOUBLE_VALUE_EN
+
+struct dcn10_stream_encoder_shift {
+ SE_REG_FIELD_LIST_DCN1_0(uint8_t);
+};
+
+struct dcn10_stream_encoder_mask {
+ SE_REG_FIELD_LIST_DCN1_0(uint32_t);
+};
+
+struct dcn10_stream_encoder {
+ struct stream_encoder base;
+ const struct dcn10_stream_enc_registers *regs;
+ const struct dcn10_stream_encoder_shift *se_shift;
+ const struct dcn10_stream_encoder_mask *se_mask;
+};
+
+void dcn10_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc1_update_generic_info_packet(
+ struct dcn10_stream_encoder *enc1,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet);
+
+void enc1_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space);
+
+void enc1_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio);
+
+void enc1_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link);
+
+void enc1_stream_encoder_set_mst_bandwidth(
+ struct stream_encoder *enc,
+ struct fixed31_32 avg_time_slots_per_mtp);
+
+void enc1_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc1_stream_encoder_stop_dp_info_packets(
+ struct stream_encoder *enc);
+
+void enc1_stream_encoder_dp_blank(
+ struct stream_encoder *enc);
+
+void enc1_stream_encoder_dp_unblank(
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+
+void enc1_setup_stereo_sync(
+ struct stream_encoder *enc,
+ int tg_inst, bool enable);
+
+void enc1_stream_encoder_set_avmute(
+ struct stream_encoder *enc,
+ bool enable);
+
+void enc1_se_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute);
+
+void enc1_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info);
+
+void enc1_se_dp_audio_enable(
+ struct stream_encoder *enc);
+
+void enc1_se_dp_audio_disable(
+ struct stream_encoder *enc);
+
+void enc1_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info);
+
+void enc1_se_hdmi_audio_disable(
+ struct stream_encoder *enc);
+
+#endif /* __DC_STREAM_ENCODER_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 22e7ee7dcd26..4ff9b2bba178 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -341,6 +341,10 @@ bool dm_dmcu_set_pipe(struct dc_context *ctx, unsigned int controller_id);
unsigned long long dm_get_timestamp(struct dc_context *ctx);
+unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
+ unsigned long long current_time_stamp,
+ unsigned long long last_time_stamp);
+
/*
* performance tracing
*/
@@ -351,10 +355,6 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line);
/*
* Debug and verification hooks
*/
-bool dm_helpers_dc_conn_log(
- struct dc_context *ctx,
- struct log_entry *entry,
- enum dc_log_type event);
void dm_dtn_log_begin(struct dc_context *ctx);
void dm_dtn_log_append_v(struct dc_context *ctx, const char *msg, ...);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index b1ad3553f900..47c19f8fe7d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -108,4 +108,17 @@ enum output_standard {
dm_std_uninitialized = 0, dm_std_cvtr2, dm_std_cvt
};
+enum mpc_combine_affinity {
+ dm_mpc_always_when_possible,
+ dm_mpc_reduce_voltage,
+ dm_mpc_reduce_voltage_and_clocks
+};
+
+enum self_refresh_affinity {
+ dm_try_to_allow_self_refresh_and_mclk_switch,
+ dm_allow_self_refresh_and_mclk_switch,
+ dm_allow_self_refresh,
+ dm_neither_self_refresh_nor_mclk_switch
+};
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index c109b2c34c8f..fd9d97aab071 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -26,75 +26,89 @@
#include "display_mode_lib.h"
#include "dc_features.h"
+static const struct _vcs_dpi_ip_params_st dcn1_0_ip = {
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs = 42,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 589824,
+ .max_line_buffer_lines = 12,
+ .IsLineBufferBppFixed = 0,
+ .LineBufferFixedBpp = -1,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .max_num_dpp = 4,
+ .max_num_wb = 2,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 4,
+ .max_vscl_ratio = 4,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 14,
+ .dppclk_delay_subtotal = 90,
+ .dispclk_delay_subtotal = 42,
+ .dcfclk_cstate_latency = 10,
+ .max_inter_dcn_tile_repeaters = 8,
+ .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0,
+ .bug_forcing_LC_req_same_size_fixed = 0,
+};
+
+static const struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .urgent_latency_us = 4.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 80.0,
+ .max_request_size_bytes = 256,
+ .downspread_percent = 0.5,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 128,
+ .urgent_out_of_order_return_per_channel_bytes = 256,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 2,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 17.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+};
+
static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum dml_project project)
{
- if (project == DML_PROJECT_RAVEN1) {
- soc->sr_exit_time_us = 9.0;
- soc->sr_enter_plus_exit_time_us = 11.0;
- soc->urgent_latency_us = 4.0;
- soc->writeback_latency_us = 12.0;
- soc->ideal_dram_bw_after_urgent_percent = 80.0;
- soc->max_request_size_bytes = 256;
- soc->downspread_percent = 0.5;
- soc->dram_page_open_time_ns = 50.0;
- soc->dram_rw_turnaround_time_ns = 17.5;
- soc->dram_return_buffer_per_channel_bytes = 8192;
- soc->round_trip_ping_latency_dcfclk_cycles = 128;
- soc->urgent_out_of_order_return_per_channel_bytes = 256;
- soc->channel_interleave_bytes = 256;
- soc->num_banks = 8;
- soc->num_chans = 2;
- soc->vmm_page_size_bytes = 4096;
- soc->dram_clock_change_latency_us = 17.0;
- soc->writeback_dram_clock_change_latency_us = 23.0;
- soc->return_bus_width_bytes = 64;
- } else {
- BREAK_TO_DEBUGGER(); /* Invalid Project Specified */
+ switch (project) {
+ case DML_PROJECT_RAVEN1:
+ *soc = dcn1_0_soc;
+ break;
+ default:
+ ASSERT(0);
+ break;
}
}
static void set_ip_params(struct _vcs_dpi_ip_params_st *ip, enum dml_project project)
{
- if (project == DML_PROJECT_RAVEN1) {
- ip->rob_buffer_size_kbytes = 64;
- ip->det_buffer_size_kbytes = 164;
- ip->dpte_buffer_size_in_pte_reqs = 42;
- ip->dpp_output_buffer_pixels = 2560;
- ip->opp_output_buffer_lines = 1;
- ip->pixel_chunk_size_kbytes = 8;
- ip->pte_enable = 1;
- ip->pte_chunk_size_kbytes = 2;
- ip->meta_chunk_size_kbytes = 2;
- ip->writeback_chunk_size_kbytes = 2;
- ip->line_buffer_size_bits = 589824;
- ip->max_line_buffer_lines = 12;
- ip->IsLineBufferBppFixed = 0;
- ip->LineBufferFixedBpp = -1;
- ip->writeback_luma_buffer_size_kbytes = 12;
- ip->writeback_chroma_buffer_size_kbytes = 8;
- ip->max_num_dpp = 4;
- ip->max_num_wb = 2;
- ip->max_dchub_pscl_bw_pix_per_clk = 4;
- ip->max_pscl_lb_bw_pix_per_clk = 2;
- ip->max_lb_vscl_bw_pix_per_clk = 4;
- ip->max_vscl_hscl_bw_pix_per_clk = 4;
- ip->max_hscl_ratio = 4;
- ip->max_vscl_ratio = 4;
- ip->hscl_mults = 4;
- ip->vscl_mults = 4;
- ip->max_hscl_taps = 8;
- ip->max_vscl_taps = 8;
- ip->dispclk_ramp_margin_percent = 1;
- ip->underscan_factor = 1.10;
- ip->min_vblank_lines = 14;
- ip->dppclk_delay_subtotal = 90;
- ip->dispclk_delay_subtotal = 42;
- ip->dcfclk_cstate_latency = 10;
- ip->max_inter_dcn_tile_repeaters = 8;
- ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0;
- ip->bug_forcing_LC_req_same_size_fixed = 0;
- } else {
- BREAK_TO_DEBUGGER(); /* Invalid Project Specified */
+ switch (project) {
+ case DML_PROJECT_RAVEN1:
+ *ip = dcn1_0_ip;
+ break;
+ default:
+ ASSERT(0);
+ break;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 09affa16cc43..7fa0375939ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -25,39 +25,39 @@
#ifndef __DISPLAY_MODE_STRUCTS_H__
#define __DISPLAY_MODE_STRUCTS_H__
-typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st;
-typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st;
-typedef struct _vcs_dpi_ip_params_st ip_params_st;
-typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st;
-typedef struct _vcs_dpi_display_output_params_st display_output_params_st;
-typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st;
-typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st;
-typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st;
-typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st;
-typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st;
-typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st;
-typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st;
-typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st;
-typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st;
-typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st;
-typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st;
-typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st;
-typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st;
-typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st;
-typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st;
-typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st;
-typedef struct _vcs_dpi_display_rq_params_st display_rq_params_st;
-typedef struct _vcs_dpi_display_dlg_regs_st display_dlg_regs_st;
-typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st;
-typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st;
-typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
-typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
-typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st;
-typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st;
-typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
+typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st;
+typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st;
+typedef struct _vcs_dpi_ip_params_st ip_params_st;
+typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st;
+typedef struct _vcs_dpi_display_output_params_st display_output_params_st;
+typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st;
+typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st;
+typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st;
+typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st;
+typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st;
+typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st;
+typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st;
+typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st;
+typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st;
+typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st;
+typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st;
+typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st;
+typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st;
+typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st;
+typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st;
+typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st;
+typedef struct _vcs_dpi_display_rq_params_st display_rq_params_st;
+typedef struct _vcs_dpi_display_dlg_regs_st display_dlg_regs_st;
+typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st;
+typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st;
+typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
+typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
+typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st;
+typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st;
+typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
struct _vcs_dpi_voltage_scaling_st {
int state;
@@ -72,89 +72,107 @@ struct _vcs_dpi_voltage_scaling_st {
double dppclk_mhz;
};
-struct _vcs_dpi_soc_bounding_box_st {
- double sr_exit_time_us;
- double sr_enter_plus_exit_time_us;
- double urgent_latency_us;
- double writeback_latency_us;
- double ideal_dram_bw_after_urgent_percent;
- unsigned int max_request_size_bytes;
- double downspread_percent;
- double dram_page_open_time_ns;
- double dram_rw_turnaround_time_ns;
- double dram_return_buffer_per_channel_bytes;
- double dram_channel_width_bytes;
+struct _vcs_dpi_soc_bounding_box_st {
+ double sr_exit_time_us;
+ double sr_enter_plus_exit_time_us;
+ double urgent_latency_us;
+ double urgent_latency_pixel_data_only_us;
+ double urgent_latency_pixel_mixed_with_vm_data_us;
+ double urgent_latency_vm_data_only_us;
+ double writeback_latency_us;
+ double ideal_dram_bw_after_urgent_percent;
+ double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
+ double pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+ double pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+ double max_avg_sdp_bw_use_normal_percent;
+ double max_avg_dram_bw_use_normal_percent;
+ unsigned int max_request_size_bytes;
+ double downspread_percent;
+ double dram_page_open_time_ns;
+ double dram_rw_turnaround_time_ns;
+ double dram_return_buffer_per_channel_bytes;
+ double dram_channel_width_bytes;
double fabric_datapath_to_dcn_data_return_bytes;
double dcn_downspread_percent;
double dispclk_dppclk_vco_speed_mhz;
double dfs_vco_period_ps;
- unsigned int round_trip_ping_latency_dcfclk_cycles;
- unsigned int urgent_out_of_order_return_per_channel_bytes;
- unsigned int channel_interleave_bytes;
- unsigned int num_banks;
- unsigned int num_chans;
- unsigned int vmm_page_size_bytes;
- double dram_clock_change_latency_us;
- double writeback_dram_clock_change_latency_us;
- unsigned int return_bus_width_bytes;
- unsigned int voltage_override;
- double xfc_bus_transport_time_us;
- double xfc_xbuf_latency_tolerance_us;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes;
+ unsigned int round_trip_ping_latency_dcfclk_cycles;
+ unsigned int urgent_out_of_order_return_per_channel_bytes;
+ unsigned int channel_interleave_bytes;
+ unsigned int num_banks;
+ unsigned int num_chans;
+ unsigned int vmm_page_size_bytes;
+ double dram_clock_change_latency_us;
+ double writeback_dram_clock_change_latency_us;
+ unsigned int return_bus_width_bytes;
+ unsigned int voltage_override;
+ double xfc_bus_transport_time_us;
+ double xfc_xbuf_latency_tolerance_us;
+ int use_urgent_burst_bw;
struct _vcs_dpi_voltage_scaling_st clock_limits[7];
};
-struct _vcs_dpi_ip_params_st {
- unsigned int max_inter_dcn_tile_repeaters;
- unsigned int num_dsc;
- unsigned int odm_capable;
- unsigned int rob_buffer_size_kbytes;
- unsigned int det_buffer_size_kbytes;
- unsigned int dpte_buffer_size_in_pte_reqs;
- unsigned int pde_proc_buffer_size_64k_reqs;
- unsigned int dpp_output_buffer_pixels;
- unsigned int opp_output_buffer_lines;
- unsigned int pixel_chunk_size_kbytes;
- unsigned char pte_enable;
- unsigned int pte_chunk_size_kbytes;
- unsigned int meta_chunk_size_kbytes;
- unsigned int writeback_chunk_size_kbytes;
- unsigned int line_buffer_size_bits;
- unsigned int max_line_buffer_lines;
- unsigned int writeback_luma_buffer_size_kbytes;
- unsigned int writeback_chroma_buffer_size_kbytes;
- unsigned int writeback_chroma_line_buffer_width_pixels;
- unsigned int max_page_table_levels;
- unsigned int max_num_dpp;
- unsigned int max_num_otg;
- unsigned int cursor_chunk_size;
- unsigned int cursor_buffer_size;
- unsigned int max_num_wb;
- unsigned int max_dchub_pscl_bw_pix_per_clk;
- unsigned int max_pscl_lb_bw_pix_per_clk;
- unsigned int max_lb_vscl_bw_pix_per_clk;
- unsigned int max_vscl_hscl_bw_pix_per_clk;
- double max_hscl_ratio;
- double max_vscl_ratio;
- unsigned int hscl_mults;
- unsigned int vscl_mults;
- unsigned int max_hscl_taps;
- unsigned int max_vscl_taps;
- unsigned int xfc_supported;
- unsigned int xfc_fill_constant_bytes;
- double dispclk_ramp_margin_percent;
- double xfc_fill_bw_overhead_percent;
- double underscan_factor;
- unsigned int min_vblank_lines;
- unsigned int dppclk_delay_subtotal;
- unsigned int dispclk_delay_subtotal;
- unsigned int dcfclk_cstate_latency;
- unsigned int dppclk_delay_scl;
- unsigned int dppclk_delay_scl_lb_only;
- unsigned int dppclk_delay_cnvc_formatter;
- unsigned int dppclk_delay_cnvc_cursor;
- unsigned int is_line_buffer_bpp_fixed;
- unsigned int line_buffer_fixed_bpp;
- unsigned int dcc_supported;
+struct _vcs_dpi_ip_params_st {
+ bool gpuvm_enable;
+ bool hostvm_enable;
+ unsigned int gpuvm_max_page_table_levels;
+ unsigned int hostvm_max_page_table_levels;
+ unsigned int hostvm_cached_page_table_levels;
+ unsigned int pte_group_size_bytes;
+ unsigned int max_inter_dcn_tile_repeaters;
+ unsigned int num_dsc;
+ unsigned int odm_capable;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int det_buffer_size_kbytes;
+ unsigned int dpte_buffer_size_in_pte_reqs;
+ unsigned int pde_proc_buffer_size_64k_reqs;
+ unsigned int dpp_output_buffer_pixels;
+ unsigned int opp_output_buffer_lines;
+ unsigned int pixel_chunk_size_kbytes;
+ unsigned char pte_enable;
+ unsigned int pte_chunk_size_kbytes;
+ unsigned int meta_chunk_size_kbytes;
+ unsigned int writeback_chunk_size_kbytes;
+ unsigned int line_buffer_size_bits;
+ unsigned int max_line_buffer_lines;
+ unsigned int writeback_luma_buffer_size_kbytes;
+ unsigned int writeback_chroma_buffer_size_kbytes;
+ unsigned int writeback_chroma_line_buffer_width_pixels;
+ unsigned int max_page_table_levels;
+ unsigned int max_num_dpp;
+ unsigned int max_num_otg;
+ unsigned int cursor_chunk_size;
+ unsigned int cursor_buffer_size;
+ unsigned int max_num_wb;
+ unsigned int max_dchub_pscl_bw_pix_per_clk;
+ unsigned int max_pscl_lb_bw_pix_per_clk;
+ unsigned int max_lb_vscl_bw_pix_per_clk;
+ unsigned int max_vscl_hscl_bw_pix_per_clk;
+ double max_hscl_ratio;
+ double max_vscl_ratio;
+ unsigned int hscl_mults;
+ unsigned int vscl_mults;
+ unsigned int max_hscl_taps;
+ unsigned int max_vscl_taps;
+ unsigned int xfc_supported;
+ unsigned int xfc_fill_constant_bytes;
+ double dispclk_ramp_margin_percent;
+ double xfc_fill_bw_overhead_percent;
+ double underscan_factor;
+ unsigned int min_vblank_lines;
+ unsigned int dppclk_delay_subtotal;
+ unsigned int dispclk_delay_subtotal;
+ unsigned int dcfclk_cstate_latency;
+ unsigned int dppclk_delay_scl;
+ unsigned int dppclk_delay_scl_lb_only;
+ unsigned int dppclk_delay_cnvc_formatter;
+ unsigned int dppclk_delay_cnvc_cursor;
+ unsigned int is_line_buffer_bpp_fixed;
+ unsigned int line_buffer_fixed_bpp;
+ unsigned int dcc_supported;
unsigned int IsLineBufferBppFixed;
unsigned int LineBufferFixedBpp;
@@ -169,41 +187,45 @@ struct _vcs_dpi_display_xfc_params_st {
int xfc_slv_chunk_size_bytes;
};
-struct _vcs_dpi_display_pipe_source_params_st {
- int source_format;
- unsigned char dcc;
- unsigned int dcc_override;
- unsigned int dcc_rate;
- unsigned char dcc_use_global;
- unsigned char vm;
- unsigned char vm_levels_force_en;
- unsigned int vm_levels_force;
- int source_scan;
- int sw_mode;
- int macro_tile_size;
- unsigned char is_display_sw;
- unsigned int viewport_width;
- unsigned int viewport_height;
- unsigned int viewport_y_y;
- unsigned int viewport_y_c;
- unsigned int viewport_width_c;
- unsigned int viewport_height_c;
- unsigned int data_pitch;
- unsigned int data_pitch_c;
- unsigned int meta_pitch;
- unsigned int meta_pitch_c;
- unsigned int cur0_src_width;
- int cur0_bpp;
- unsigned int cur1_src_width;
- int cur1_bpp;
- int num_cursors;
- unsigned char is_hsplit;
- unsigned char dynamic_metadata_enable;
- unsigned int dynamic_metadata_lines_before_active;
- unsigned int dynamic_metadata_xmit_bytes;
- unsigned int hsplit_grp;
- unsigned char xfc_enable;
- unsigned char xfc_slave;
+struct _vcs_dpi_display_pipe_source_params_st {
+ int source_format;
+ unsigned char dcc;
+ unsigned int dcc_override;
+ unsigned int dcc_rate;
+ unsigned char dcc_use_global;
+ unsigned char vm;
+ bool gpuvm; // gpuvm enabled
+ bool hostvm; // hostvm enabled
+ bool gpuvm_levels_force_en;
+ unsigned int gpuvm_levels_force;
+ bool hostvm_levels_force_en;
+ unsigned int hostvm_levels_force;
+ int source_scan;
+ int sw_mode;
+ int macro_tile_size;
+ unsigned char is_display_sw;
+ unsigned int viewport_width;
+ unsigned int viewport_height;
+ unsigned int viewport_y_y;
+ unsigned int viewport_y_c;
+ unsigned int viewport_width_c;
+ unsigned int viewport_height_c;
+ unsigned int data_pitch;
+ unsigned int data_pitch_c;
+ unsigned int meta_pitch;
+ unsigned int meta_pitch_c;
+ unsigned int cur0_src_width;
+ int cur0_bpp;
+ unsigned int cur1_src_width;
+ int cur1_bpp;
+ int num_cursors;
+ unsigned char is_hsplit;
+ unsigned char dynamic_metadata_enable;
+ unsigned int dynamic_metadata_lines_before_active;
+ unsigned int dynamic_metadata_xmit_bytes;
+ unsigned int hsplit_grp;
+ unsigned char xfc_enable;
+ unsigned char xfc_slave;
struct _vcs_dpi_display_xfc_params_st xfc_params;
};
struct writeback_st {
@@ -215,338 +237,339 @@ struct writeback_st {
int wb_vtaps_luma;
int wb_htaps_chroma;
int wb_vtaps_chroma;
- int wb_hratio;
- int wb_vratio;
-};
-
-struct _vcs_dpi_display_output_params_st {
- int dp_lanes;
- int output_bpp;
- int dsc_enable;
- int wb_enable;
- int opp_input_bpc;
- int output_type;
- int output_format;
- int output_standard;
- int dsc_slices;
+ double wb_hratio;
+ double wb_vratio;
+};
+
+struct _vcs_dpi_display_output_params_st {
+ int dp_lanes;
+ int output_bpp;
+ int dsc_enable;
+ int wb_enable;
+ int num_active_wb;
+ int output_bpc;
+ int output_type;
+ int output_format;
+ int output_standard;
+ int dsc_slices;
struct writeback_st wb;
};
-struct _vcs_dpi_display_bandwidth_st {
- double total_bw_consumed_gbps;
- double guaranteed_urgent_return_bw_gbps;
-};
-
-struct _vcs_dpi_scaler_ratio_depth_st {
- double hscl_ratio;
- double vscl_ratio;
- double hscl_ratio_c;
- double vscl_ratio_c;
- double vinit;
- double vinit_c;
- double vinit_bot;
- double vinit_bot_c;
- int lb_depth;
- int scl_enable;
-};
-
-struct _vcs_dpi_scaler_taps_st {
- unsigned int htaps;
- unsigned int vtaps;
- unsigned int htaps_c;
- unsigned int vtaps_c;
-};
-
-struct _vcs_dpi_display_pipe_dest_params_st {
- unsigned int recout_width;
- unsigned int recout_height;
- unsigned int full_recout_width;
- unsigned int full_recout_height;
- unsigned int hblank_start;
- unsigned int hblank_end;
- unsigned int vblank_start;
- unsigned int vblank_end;
- unsigned int htotal;
- unsigned int vtotal;
- unsigned int vactive;
- unsigned int hactive;
- unsigned int vstartup_start;
- unsigned int vupdate_offset;
- unsigned int vupdate_width;
- unsigned int vready_offset;
- unsigned char interlaced;
- unsigned char underscan;
- double pixel_rate_mhz;
- unsigned char synchronized_vblank_all_planes;
- unsigned char otg_inst;
- unsigned char odm_split_cnt;
- unsigned char odm_combine;
-};
-
-struct _vcs_dpi_display_pipe_params_st {
- display_pipe_source_params_st src;
- display_pipe_dest_params_st dest;
- scaler_ratio_depth_st scale_ratio_depth;
- scaler_taps_st scale_taps;
-};
-
-struct _vcs_dpi_display_clocks_and_cfg_st {
- int voltage;
- double dppclk_mhz;
- double refclk_mhz;
- double dispclk_mhz;
- double dcfclk_mhz;
- double socclk_mhz;
-};
-
-struct _vcs_dpi_display_e2e_pipe_params_st {
- display_pipe_params_st pipe;
- display_output_params_st dout;
- display_clocks_and_cfg_st clks_cfg;
-};
-
-struct _vcs_dpi_dchub_buffer_sizing_st {
- unsigned int swath_width_y;
- unsigned int swath_height_y;
- unsigned int swath_height_c;
- unsigned int detail_buffer_size_y;
-};
-
-struct _vcs_dpi_watermarks_perf_st {
- double stutter_eff_in_active_region_percent;
- double urgent_latency_supported_us;
- double non_urgent_latency_supported_us;
- double dram_clock_change_margin_us;
- double dram_access_eff_percent;
-};
-
-struct _vcs_dpi_cstate_pstate_watermarks_st {
- double cstate_exit_us;
- double cstate_enter_plus_exit_us;
- double pstate_change_us;
-};
-
-struct _vcs_dpi_wm_calc_pipe_params_st {
- unsigned int num_dpp;
- int voltage;
- int output_type;
- double dcfclk_mhz;
- double socclk_mhz;
- double dppclk_mhz;
- double pixclk_mhz;
- unsigned char interlace_en;
- unsigned char pte_enable;
- unsigned char dcc_enable;
- double dcc_rate;
- double bytes_per_pixel_c;
- double bytes_per_pixel_y;
- unsigned int swath_width_y;
- unsigned int swath_height_y;
- unsigned int swath_height_c;
- unsigned int det_buffer_size_y;
- double h_ratio;
- double v_ratio;
- unsigned int h_taps;
- unsigned int h_total;
- unsigned int v_total;
- unsigned int v_active;
- unsigned int e2e_index;
- double display_pipe_line_delivery_time;
- double read_bw;
- unsigned int lines_in_det_y;
- unsigned int lines_in_det_y_rounded_down_to_swath;
- double full_det_buffering_time;
- double dcfclk_deepsleep_mhz_per_plane;
-};
-
-struct _vcs_dpi_vratio_pre_st {
- double vratio_pre_l;
- double vratio_pre_c;
-};
-
-struct _vcs_dpi_display_data_rq_misc_params_st {
- unsigned int full_swath_bytes;
- unsigned int stored_swath_bytes;
- unsigned int blk256_height;
- unsigned int blk256_width;
- unsigned int req_height;
- unsigned int req_width;
-};
-
-struct _vcs_dpi_display_data_rq_sizing_params_st {
- unsigned int chunk_bytes;
- unsigned int min_chunk_bytes;
- unsigned int meta_chunk_bytes;
- unsigned int min_meta_chunk_bytes;
- unsigned int mpte_group_bytes;
- unsigned int dpte_group_bytes;
-};
-
-struct _vcs_dpi_display_data_rq_dlg_params_st {
- unsigned int swath_width_ub;
- unsigned int swath_height;
- unsigned int req_per_swath_ub;
- unsigned int meta_pte_bytes_per_frame_ub;
- unsigned int dpte_req_per_row_ub;
- unsigned int dpte_groups_per_row_ub;
- unsigned int dpte_row_height;
- unsigned int dpte_bytes_per_row_ub;
- unsigned int meta_chunks_per_row_ub;
- unsigned int meta_req_per_row_ub;
- unsigned int meta_row_height;
- unsigned int meta_bytes_per_row_ub;
-};
-
-struct _vcs_dpi_display_cur_rq_dlg_params_st {
- unsigned char enable;
- unsigned int swath_height;
- unsigned int req_per_line;
-};
-
-struct _vcs_dpi_display_rq_dlg_params_st {
- display_data_rq_dlg_params_st rq_l;
- display_data_rq_dlg_params_st rq_c;
- display_cur_rq_dlg_params_st rq_cur0;
-};
-
-struct _vcs_dpi_display_rq_sizing_params_st {
- display_data_rq_sizing_params_st rq_l;
- display_data_rq_sizing_params_st rq_c;
-};
-
-struct _vcs_dpi_display_rq_misc_params_st {
- display_data_rq_misc_params_st rq_l;
- display_data_rq_misc_params_st rq_c;
-};
-
-struct _vcs_dpi_display_rq_params_st {
- unsigned char yuv420;
- unsigned char yuv420_10bpc;
- display_rq_misc_params_st misc;
- display_rq_sizing_params_st sizing;
- display_rq_dlg_params_st dlg;
-};
-
-struct _vcs_dpi_display_dlg_regs_st {
- unsigned int refcyc_h_blank_end;
- unsigned int dlg_vblank_end;
- unsigned int min_dst_y_next_start;
- unsigned int refcyc_per_htotal;
- unsigned int refcyc_x_after_scaler;
- unsigned int dst_y_after_scaler;
- unsigned int dst_y_prefetch;
- unsigned int dst_y_per_vm_vblank;
- unsigned int dst_y_per_row_vblank;
- unsigned int dst_y_per_vm_flip;
- unsigned int dst_y_per_row_flip;
- unsigned int ref_freq_to_pix_freq;
- unsigned int vratio_prefetch;
- unsigned int vratio_prefetch_c;
- unsigned int refcyc_per_pte_group_vblank_l;
- unsigned int refcyc_per_pte_group_vblank_c;
- unsigned int refcyc_per_meta_chunk_vblank_l;
- unsigned int refcyc_per_meta_chunk_vblank_c;
- unsigned int refcyc_per_pte_group_flip_l;
- unsigned int refcyc_per_pte_group_flip_c;
- unsigned int refcyc_per_meta_chunk_flip_l;
- unsigned int refcyc_per_meta_chunk_flip_c;
- unsigned int dst_y_per_pte_row_nom_l;
- unsigned int dst_y_per_pte_row_nom_c;
- unsigned int refcyc_per_pte_group_nom_l;
- unsigned int refcyc_per_pte_group_nom_c;
- unsigned int dst_y_per_meta_row_nom_l;
- unsigned int dst_y_per_meta_row_nom_c;
- unsigned int refcyc_per_meta_chunk_nom_l;
- unsigned int refcyc_per_meta_chunk_nom_c;
- unsigned int refcyc_per_line_delivery_pre_l;
- unsigned int refcyc_per_line_delivery_pre_c;
- unsigned int refcyc_per_line_delivery_l;
- unsigned int refcyc_per_line_delivery_c;
- unsigned int chunk_hdl_adjust_cur0;
- unsigned int chunk_hdl_adjust_cur1;
- unsigned int vready_after_vcount0;
- unsigned int dst_y_offset_cur0;
- unsigned int dst_y_offset_cur1;
- unsigned int xfc_reg_transfer_delay;
- unsigned int xfc_reg_precharge_delay;
- unsigned int xfc_reg_remote_surface_flip_latency;
- unsigned int xfc_reg_prefetch_margin;
- unsigned int dst_y_delta_drq_limit;
-};
-
-struct _vcs_dpi_display_ttu_regs_st {
- unsigned int qos_level_low_wm;
- unsigned int qos_level_high_wm;
- unsigned int min_ttu_vblank;
- unsigned int qos_level_flip;
- unsigned int refcyc_per_req_delivery_l;
- unsigned int refcyc_per_req_delivery_c;
- unsigned int refcyc_per_req_delivery_cur0;
- unsigned int refcyc_per_req_delivery_cur1;
- unsigned int refcyc_per_req_delivery_pre_l;
- unsigned int refcyc_per_req_delivery_pre_c;
- unsigned int refcyc_per_req_delivery_pre_cur0;
- unsigned int refcyc_per_req_delivery_pre_cur1;
- unsigned int qos_level_fixed_l;
- unsigned int qos_level_fixed_c;
- unsigned int qos_level_fixed_cur0;
- unsigned int qos_level_fixed_cur1;
- unsigned int qos_ramp_disable_l;
- unsigned int qos_ramp_disable_c;
- unsigned int qos_ramp_disable_cur0;
- unsigned int qos_ramp_disable_cur1;
-};
-
-struct _vcs_dpi_display_data_rq_regs_st {
- unsigned int chunk_size;
- unsigned int min_chunk_size;
- unsigned int meta_chunk_size;
- unsigned int min_meta_chunk_size;
- unsigned int dpte_group_size;
- unsigned int mpte_group_size;
- unsigned int swath_height;
- unsigned int pte_row_height_linear;
-};
-
-struct _vcs_dpi_display_rq_regs_st {
- display_data_rq_regs_st rq_regs_l;
- display_data_rq_regs_st rq_regs_c;
- unsigned int drq_expansion_mode;
- unsigned int prq_expansion_mode;
- unsigned int mrq_expansion_mode;
- unsigned int crq_expansion_mode;
- unsigned int plane1_base_address;
-};
-
-struct _vcs_dpi_display_dlg_sys_params_st {
- double t_mclk_wm_us;
- double t_urg_wm_us;
- double t_sr_wm_us;
- double t_extra_us;
- double mem_trip_us;
- double t_srx_delay_us;
- double deepsleep_dcfclk_mhz;
- double total_flip_bw;
- unsigned int total_flip_bytes;
-};
-
-struct _vcs_dpi_display_dlg_prefetch_param_st {
- double prefetch_bw;
- unsigned int flip_bytes;
-};
-
-struct _vcs_dpi_display_pipe_clock_st {
- double dcfclk_mhz;
- double dispclk_mhz;
- double socclk_mhz;
- double dscclk_mhz[6];
- double dppclk_mhz[6];
-};
-
-struct _vcs_dpi_display_arb_params_st {
- int max_req_outstanding;
- int min_req_outstanding;
- int sat_level_us;
+struct _vcs_dpi_display_bandwidth_st {
+ double total_bw_consumed_gbps;
+ double guaranteed_urgent_return_bw_gbps;
+};
+
+struct _vcs_dpi_scaler_ratio_depth_st {
+ double hscl_ratio;
+ double vscl_ratio;
+ double hscl_ratio_c;
+ double vscl_ratio_c;
+ double vinit;
+ double vinit_c;
+ double vinit_bot;
+ double vinit_bot_c;
+ int lb_depth;
+ int scl_enable;
+};
+
+struct _vcs_dpi_scaler_taps_st {
+ unsigned int htaps;
+ unsigned int vtaps;
+ unsigned int htaps_c;
+ unsigned int vtaps_c;
+};
+
+struct _vcs_dpi_display_pipe_dest_params_st {
+ unsigned int recout_width;
+ unsigned int recout_height;
+ unsigned int full_recout_width;
+ unsigned int full_recout_height;
+ unsigned int hblank_start;
+ unsigned int hblank_end;
+ unsigned int vblank_start;
+ unsigned int vblank_end;
+ unsigned int htotal;
+ unsigned int vtotal;
+ unsigned int vactive;
+ unsigned int hactive;
+ unsigned int vstartup_start;
+ unsigned int vupdate_offset;
+ unsigned int vupdate_width;
+ unsigned int vready_offset;
+ unsigned char interlaced;
+ unsigned char underscan;
+ double pixel_rate_mhz;
+ unsigned char synchronized_vblank_all_planes;
+ unsigned char otg_inst;
+ unsigned char odm_split_cnt;
+ unsigned char odm_combine;
+};
+
+struct _vcs_dpi_display_pipe_params_st {
+ display_pipe_source_params_st src;
+ display_pipe_dest_params_st dest;
+ scaler_ratio_depth_st scale_ratio_depth;
+ scaler_taps_st scale_taps;
+};
+
+struct _vcs_dpi_display_clocks_and_cfg_st {
+ int voltage;
+ double dppclk_mhz;
+ double refclk_mhz;
+ double dispclk_mhz;
+ double dcfclk_mhz;
+ double socclk_mhz;
+};
+
+struct _vcs_dpi_display_e2e_pipe_params_st {
+ display_pipe_params_st pipe;
+ display_output_params_st dout;
+ display_clocks_and_cfg_st clks_cfg;
+};
+
+struct _vcs_dpi_dchub_buffer_sizing_st {
+ unsigned int swath_width_y;
+ unsigned int swath_height_y;
+ unsigned int swath_height_c;
+ unsigned int detail_buffer_size_y;
+};
+
+struct _vcs_dpi_watermarks_perf_st {
+ double stutter_eff_in_active_region_percent;
+ double urgent_latency_supported_us;
+ double non_urgent_latency_supported_us;
+ double dram_clock_change_margin_us;
+ double dram_access_eff_percent;
+};
+
+struct _vcs_dpi_cstate_pstate_watermarks_st {
+ double cstate_exit_us;
+ double cstate_enter_plus_exit_us;
+ double pstate_change_us;
+};
+
+struct _vcs_dpi_wm_calc_pipe_params_st {
+ unsigned int num_dpp;
+ int voltage;
+ int output_type;
+ double dcfclk_mhz;
+ double socclk_mhz;
+ double dppclk_mhz;
+ double pixclk_mhz;
+ unsigned char interlace_en;
+ unsigned char pte_enable;
+ unsigned char dcc_enable;
+ double dcc_rate;
+ double bytes_per_pixel_c;
+ double bytes_per_pixel_y;
+ unsigned int swath_width_y;
+ unsigned int swath_height_y;
+ unsigned int swath_height_c;
+ unsigned int det_buffer_size_y;
+ double h_ratio;
+ double v_ratio;
+ unsigned int h_taps;
+ unsigned int h_total;
+ unsigned int v_total;
+ unsigned int v_active;
+ unsigned int e2e_index;
+ double display_pipe_line_delivery_time;
+ double read_bw;
+ unsigned int lines_in_det_y;
+ unsigned int lines_in_det_y_rounded_down_to_swath;
+ double full_det_buffering_time;
+ double dcfclk_deepsleep_mhz_per_plane;
+};
+
+struct _vcs_dpi_vratio_pre_st {
+ double vratio_pre_l;
+ double vratio_pre_c;
+};
+
+struct _vcs_dpi_display_data_rq_misc_params_st {
+ unsigned int full_swath_bytes;
+ unsigned int stored_swath_bytes;
+ unsigned int blk256_height;
+ unsigned int blk256_width;
+ unsigned int req_height;
+ unsigned int req_width;
+};
+
+struct _vcs_dpi_display_data_rq_sizing_params_st {
+ unsigned int chunk_bytes;
+ unsigned int min_chunk_bytes;
+ unsigned int meta_chunk_bytes;
+ unsigned int min_meta_chunk_bytes;
+ unsigned int mpte_group_bytes;
+ unsigned int dpte_group_bytes;
+};
+
+struct _vcs_dpi_display_data_rq_dlg_params_st {
+ unsigned int swath_width_ub;
+ unsigned int swath_height;
+ unsigned int req_per_swath_ub;
+ unsigned int meta_pte_bytes_per_frame_ub;
+ unsigned int dpte_req_per_row_ub;
+ unsigned int dpte_groups_per_row_ub;
+ unsigned int dpte_row_height;
+ unsigned int dpte_bytes_per_row_ub;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_req_per_row_ub;
+ unsigned int meta_row_height;
+ unsigned int meta_bytes_per_row_ub;
+};
+
+struct _vcs_dpi_display_cur_rq_dlg_params_st {
+ unsigned char enable;
+ unsigned int swath_height;
+ unsigned int req_per_line;
+};
+
+struct _vcs_dpi_display_rq_dlg_params_st {
+ display_data_rq_dlg_params_st rq_l;
+ display_data_rq_dlg_params_st rq_c;
+ display_cur_rq_dlg_params_st rq_cur0;
+};
+
+struct _vcs_dpi_display_rq_sizing_params_st {
+ display_data_rq_sizing_params_st rq_l;
+ display_data_rq_sizing_params_st rq_c;
+};
+
+struct _vcs_dpi_display_rq_misc_params_st {
+ display_data_rq_misc_params_st rq_l;
+ display_data_rq_misc_params_st rq_c;
+};
+
+struct _vcs_dpi_display_rq_params_st {
+ unsigned char yuv420;
+ unsigned char yuv420_10bpc;
+ display_rq_misc_params_st misc;
+ display_rq_sizing_params_st sizing;
+ display_rq_dlg_params_st dlg;
+};
+
+struct _vcs_dpi_display_dlg_regs_st {
+ unsigned int refcyc_h_blank_end;
+ unsigned int dlg_vblank_end;
+ unsigned int min_dst_y_next_start;
+ unsigned int refcyc_per_htotal;
+ unsigned int refcyc_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+ unsigned int dst_y_prefetch;
+ unsigned int dst_y_per_vm_vblank;
+ unsigned int dst_y_per_row_vblank;
+ unsigned int dst_y_per_vm_flip;
+ unsigned int dst_y_per_row_flip;
+ unsigned int ref_freq_to_pix_freq;
+ unsigned int vratio_prefetch;
+ unsigned int vratio_prefetch_c;
+ unsigned int refcyc_per_pte_group_vblank_l;
+ unsigned int refcyc_per_pte_group_vblank_c;
+ unsigned int refcyc_per_meta_chunk_vblank_l;
+ unsigned int refcyc_per_meta_chunk_vblank_c;
+ unsigned int refcyc_per_pte_group_flip_l;
+ unsigned int refcyc_per_pte_group_flip_c;
+ unsigned int refcyc_per_meta_chunk_flip_l;
+ unsigned int refcyc_per_meta_chunk_flip_c;
+ unsigned int dst_y_per_pte_row_nom_l;
+ unsigned int dst_y_per_pte_row_nom_c;
+ unsigned int refcyc_per_pte_group_nom_l;
+ unsigned int refcyc_per_pte_group_nom_c;
+ unsigned int dst_y_per_meta_row_nom_l;
+ unsigned int dst_y_per_meta_row_nom_c;
+ unsigned int refcyc_per_meta_chunk_nom_l;
+ unsigned int refcyc_per_meta_chunk_nom_c;
+ unsigned int refcyc_per_line_delivery_pre_l;
+ unsigned int refcyc_per_line_delivery_pre_c;
+ unsigned int refcyc_per_line_delivery_l;
+ unsigned int refcyc_per_line_delivery_c;
+ unsigned int chunk_hdl_adjust_cur0;
+ unsigned int chunk_hdl_adjust_cur1;
+ unsigned int vready_after_vcount0;
+ unsigned int dst_y_offset_cur0;
+ unsigned int dst_y_offset_cur1;
+ unsigned int xfc_reg_transfer_delay;
+ unsigned int xfc_reg_precharge_delay;
+ unsigned int xfc_reg_remote_surface_flip_latency;
+ unsigned int xfc_reg_prefetch_margin;
+ unsigned int dst_y_delta_drq_limit;
+};
+
+struct _vcs_dpi_display_ttu_regs_st {
+ unsigned int qos_level_low_wm;
+ unsigned int qos_level_high_wm;
+ unsigned int min_ttu_vblank;
+ unsigned int qos_level_flip;
+ unsigned int refcyc_per_req_delivery_l;
+ unsigned int refcyc_per_req_delivery_c;
+ unsigned int refcyc_per_req_delivery_cur0;
+ unsigned int refcyc_per_req_delivery_cur1;
+ unsigned int refcyc_per_req_delivery_pre_l;
+ unsigned int refcyc_per_req_delivery_pre_c;
+ unsigned int refcyc_per_req_delivery_pre_cur0;
+ unsigned int refcyc_per_req_delivery_pre_cur1;
+ unsigned int qos_level_fixed_l;
+ unsigned int qos_level_fixed_c;
+ unsigned int qos_level_fixed_cur0;
+ unsigned int qos_level_fixed_cur1;
+ unsigned int qos_ramp_disable_l;
+ unsigned int qos_ramp_disable_c;
+ unsigned int qos_ramp_disable_cur0;
+ unsigned int qos_ramp_disable_cur1;
+};
+
+struct _vcs_dpi_display_data_rq_regs_st {
+ unsigned int chunk_size;
+ unsigned int min_chunk_size;
+ unsigned int meta_chunk_size;
+ unsigned int min_meta_chunk_size;
+ unsigned int dpte_group_size;
+ unsigned int mpte_group_size;
+ unsigned int swath_height;
+ unsigned int pte_row_height_linear;
+};
+
+struct _vcs_dpi_display_rq_regs_st {
+ display_data_rq_regs_st rq_regs_l;
+ display_data_rq_regs_st rq_regs_c;
+ unsigned int drq_expansion_mode;
+ unsigned int prq_expansion_mode;
+ unsigned int mrq_expansion_mode;
+ unsigned int crq_expansion_mode;
+ unsigned int plane1_base_address;
+};
+
+struct _vcs_dpi_display_dlg_sys_params_st {
+ double t_mclk_wm_us;
+ double t_urg_wm_us;
+ double t_sr_wm_us;
+ double t_extra_us;
+ double mem_trip_us;
+ double t_srx_delay_us;
+ double deepsleep_dcfclk_mhz;
+ double total_flip_bw;
+ unsigned int total_flip_bytes;
+};
+
+struct _vcs_dpi_display_dlg_prefetch_param_st {
+ double prefetch_bw;
+ unsigned int flip_bytes;
+};
+
+struct _vcs_dpi_display_pipe_clock_st {
+ double dcfclk_mhz;
+ double dispclk_mhz;
+ double socclk_mhz;
+ double dscclk_mhz[6];
+ double dppclk_mhz[6];
+};
+
+struct _vcs_dpi_display_arb_params_st {
+ int max_req_outstanding;
+ int min_req_outstanding;
+ int sat_level_us;
};
#endif /*__DISPLAY_MODE_STRUCTS_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index f9cf08357989..e8ce08567cd8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -35,6 +35,16 @@ static inline double dml_min(double a, double b)
return (double) dcn_bw_min2(a, b);
}
+static inline double dml_min3(double a, double b, double c)
+{
+ return dml_min(dml_min(a, b), c);
+}
+
+static inline double dml_min4(double a, double b, double c, double d)
+{
+ return dml_min(dml_min(a, b), dml_min(c, d));
+}
+
static inline double dml_max(double a, double b)
{
return (double) dcn_bw_max2(a, b);
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index 87b580fa4bc9..0caee3523017 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -75,6 +75,7 @@ bool dal_hw_factory_init(
return true;
case DCE_VERSION_11_0:
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
dal_hw_factory_dce110_init(factory);
return true;
case DCE_VERSION_12_0:
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 0ae8ace25739..55c707488541 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -72,6 +72,7 @@ bool dal_hw_translate_init(
case DCE_VERSION_10_0:
case DCE_VERSION_11_0:
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
dal_hw_translate_dce110_init(translate);
return true;
case DCE_VERSION_12_0:
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
index abd0095ced30..b7256f595052 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
@@ -527,7 +527,7 @@ static void construct(
REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div);
if (xtal_ref_div == 0) {
- DC_LOG_WARNING("Invalid base timer divider\n",
+ DC_LOG_WARNING("Invalid base timer divider [%s]\n",
__func__);
xtal_ref_div = 2;
}
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
index 5cbf6626b8d4..14dc8c94d862 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
@@ -83,6 +83,7 @@ struct i2caux *dal_i2caux_create(
case DCE_VERSION_8_3:
return dal_i2caux_dce80_create(ctx);
case DCE_VERSION_11_2:
+ case DCE_VERSION_11_22:
return dal_i2caux_dce112_create(ctx);
case DCE_VERSION_11_0:
return dal_i2caux_dce110_create(ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 8c51ad70cace..a94942d4e66b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -95,11 +95,6 @@ struct resource_funcs {
struct link_encoder *(*link_enc_create)(
const struct encoder_init_data *init);
- enum dc_status (*validate_guaranteed)(
- struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context);
-
bool (*validate_bandwidth)(
struct dc *dc,
struct dc_state *context);
@@ -250,6 +245,7 @@ struct dce_bw_output {
bool all_displays_in_sync;
struct dce_watermarks urgent_wm_ns[MAX_PIPES];
struct dce_watermarks stutter_exit_wm_ns[MAX_PIPES];
+ struct dce_watermarks stutter_entry_wm_ns[MAX_PIPES];
struct dce_watermarks nbp_state_change_wm_ns[MAX_PIPES];
int sclk_khz;
int sclk_deep_sleep_khz;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
index a9bfe9ff8ce6..eece165206f9 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
@@ -42,6 +42,8 @@ enum bw_calcs_version {
BW_CALCS_VERSION_CARRIZO,
BW_CALCS_VERSION_POLARIS10,
BW_CALCS_VERSION_POLARIS11,
+ BW_CALCS_VERSION_POLARIS12,
+ BW_CALCS_VERSION_VEGAM,
BW_CALCS_VERSION_STONEY,
BW_CALCS_VERSION_VEGA10
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
new file mode 100644
index 000000000000..02f757dd70d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DAL_DCHUBBUB_H__
+#define __DAL_DCHUBBUB_H__
+
+
+enum dcc_control {
+ dcc_control__256_256_xxx,
+ dcc_control__128_128_xxx,
+ dcc_control__256_64_64,
+};
+
+enum segment_order {
+ segment_order__na,
+ segment_order__contiguous,
+ segment_order__non_contiguous,
+};
+
+
+struct hubbub_funcs {
+ void (*update_dchub)(
+ struct hubbub *hubbub,
+ struct dchub_init_data *dh_data);
+
+ bool (*get_dcc_compression_cap)(struct hubbub *hubbub,
+ const struct dc_dcc_surface_param *input,
+ struct dc_surface_dcc_cap *output);
+
+ bool (*dcc_support_swizzle)(
+ enum swizzle_mode_values swizzle,
+ unsigned int bytes_per_element,
+ enum segment_order *segment_order_horz,
+ enum segment_order *segment_order_vert);
+
+ bool (*dcc_support_pixel_format)(
+ enum surface_pixel_format format,
+ unsigned int *bytes_per_element);
+};
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index 99995608b620..582458f028f8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -44,7 +44,23 @@ struct dpp_grph_csc_adjustment {
enum graphics_gamut_adjust_type gamut_adjust_type;
};
+struct dcn_dpp_state {
+ uint32_t igam_lut_mode;
+ uint32_t igam_input_format;
+ uint32_t dgam_lut_mode;
+ uint32_t rgam_lut_mode;
+ uint32_t gamut_remap_mode;
+ uint32_t gamut_remap_c11_c12;
+ uint32_t gamut_remap_c13_c14;
+ uint32_t gamut_remap_c21_c22;
+ uint32_t gamut_remap_c23_c24;
+ uint32_t gamut_remap_c31_c32;
+ uint32_t gamut_remap_c33_c34;
+};
+
struct dpp_funcs {
+ void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s);
+
void (*dpp_reset)(struct dpp *dpp);
void (*dpp_set_scaler)(struct dpp *dpp,
@@ -117,7 +133,7 @@ struct dpp_funcs {
struct dpp *dpp_base,
enum surface_pixel_format format,
enum expansion_mode mode,
- struct csc_transform input_csc_color_matrix,
+ struct dc_csc_transform input_csc_color_matrix,
enum dc_color_space input_color_space);
void (*dpp_full_bypass)(struct dpp *dpp_base);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 9ced254e652c..97df82cddf82 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -56,7 +56,6 @@ struct hubp {
bool power_gated;
};
-
struct hubp_funcs {
void (*hubp_setup)(
struct hubp *hubp,
@@ -121,6 +120,9 @@ struct hubp_funcs {
void (*hubp_clk_cntl)(struct hubp *hubp, bool enable);
void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst);
+ void (*hubp_read_state)(struct hubp *hubp);
+ void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp);
+ unsigned int (*hubp_get_underflow_status)(struct hubp *hubp);
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b22158190262..cf7433ebf91a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -140,11 +140,6 @@ enum opp_regamma {
OPP_REGAMMA_USER
};
-struct csc_transform {
- uint16_t matrix[12];
- bool enable_adjustment;
-};
-
struct dc_bias_and_scale {
uint16_t scale_red;
uint16_t bias_red;
@@ -191,4 +186,9 @@ enum controller_dp_test_pattern {
CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA
};
+enum dc_lut_mode {
+ LUT_BYPASS,
+ LUT_RAM_A,
+ LUT_RAM_B
+};
#endif /* __DAL_HW_SHARED_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
index 2109eac20a3d..b2fa4c4cd920 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
@@ -87,7 +87,7 @@ struct ipp_funcs {
struct input_pixel_processor *ipp,
enum surface_pixel_format format,
enum expansion_mode mode,
- struct csc_transform input_csc_color_matrix,
+ struct dc_csc_transform input_csc_color_matrix,
enum dc_color_space input_color_space);
/* DCE function to setup IPP. TODO: see if we can consolidate to setup */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index 54d8a1386142..cf6df2e7beb2 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -149,6 +149,7 @@ struct link_encoder_funcs {
bool connect);
void (*enable_hpd)(struct link_encoder *enc);
void (*disable_hpd)(struct link_encoder *enc);
+ bool (*is_dig_enabled)(struct link_encoder *enc);
void (*destroy)(struct link_encoder **enc);
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
index 3e1e7e6a8792..47f1dc5a43b7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
@@ -104,6 +104,7 @@ struct mem_input_funcs {
struct mem_input *mem_input,
struct dce_watermarks nbp,
struct dce_watermarks stutter,
+ struct dce_watermarks stutter_enter,
struct dce_watermarks urgent,
uint32_t total_dest_line_time_ns);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 23a8d5e53a89..caf74e3c836f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -105,7 +105,24 @@ struct mpc {
struct mpcc mpcc_array[MAX_MPCC];
};
+struct mpcc_state {
+ uint32_t opp_id;
+ uint32_t dpp_id;
+ uint32_t bot_mpcc_id;
+ uint32_t mode;
+ uint32_t alpha_mode;
+ uint32_t pre_multiplied_alpha;
+ uint32_t overlap_only;
+ uint32_t idle;
+ uint32_t busy;
+};
+
struct mpc_funcs {
+ void (*read_mpcc_state)(
+ struct mpc *mpc,
+ int mpcc_inst,
+ struct mpcc_state *s);
+
/*
* Insert DPP into MPC tree based on specified blending position.
* Only used for planes that are part of blending chain for OPP output
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index b5db1692393c..cfa7ec9517ae 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -29,31 +29,40 @@
#define STREAM_ENCODER_H_
#include "audio_types.h"
+#include "hw_shared.h"
struct dc_bios;
struct dc_context;
struct dc_crtc_timing;
-struct encoder_info_packet {
- bool valid;
- uint8_t hb0;
- uint8_t hb1;
- uint8_t hb2;
- uint8_t hb3;
- uint8_t sb[32];
+enum dp_pixel_encoding_type {
+ DP_PIXEL_ENCODING_TYPE_RGB444 = 0x00000000,
+ DP_PIXEL_ENCODING_TYPE_YCBCR422 = 0x00000001,
+ DP_PIXEL_ENCODING_TYPE_YCBCR444 = 0x00000002,
+ DP_PIXEL_ENCODING_TYPE_RGB_WIDE_GAMUT = 0x00000003,
+ DP_PIXEL_ENCODING_TYPE_Y_ONLY = 0x00000004,
+ DP_PIXEL_ENCODING_TYPE_YCBCR420 = 0x00000005
+};
+
+enum dp_component_depth {
+ DP_COMPONENT_PIXEL_DEPTH_6BPC = 0x00000000,
+ DP_COMPONENT_PIXEL_DEPTH_8BPC = 0x00000001,
+ DP_COMPONENT_PIXEL_DEPTH_10BPC = 0x00000002,
+ DP_COMPONENT_PIXEL_DEPTH_12BPC = 0x00000003,
+ DP_COMPONENT_PIXEL_DEPTH_16BPC = 0x00000004
};
struct encoder_info_frame {
/* auxiliary video information */
- struct encoder_info_packet avi;
- struct encoder_info_packet gamut;
- struct encoder_info_packet vendor;
+ struct dc_info_packet avi;
+ struct dc_info_packet gamut;
+ struct dc_info_packet vendor;
/* source product description */
- struct encoder_info_packet spd;
+ struct dc_info_packet spd;
/* video stream configuration */
- struct encoder_info_packet vsc;
+ struct dc_info_packet vsc;
/* HDR Static MetaData */
- struct encoder_info_packet hdrsmd;
+ struct dc_info_packet hdrsmd;
};
struct encoder_unblank_param {
@@ -147,6 +156,7 @@ struct stream_encoder_funcs {
void (*set_avmute)(
struct stream_encoder *enc, bool enable);
+
};
#endif /* STREAM_ENCODER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 3217b5bf6c7a..69cb0a105300 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -140,6 +140,9 @@ struct timing_generator_funcs {
void (*program_timing)(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
bool use_vbios);
+ void (*program_vline_interrupt)(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing,
+ unsigned long long vsync_delta);
bool (*enable_crtc)(struct timing_generator *tg);
bool (*disable_crtc)(struct timing_generator *tg);
bool (*is_counter_moving)(struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
index c5b3623bcbd9..fecc80c47c26 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
@@ -252,7 +252,7 @@ struct transform_funcs {
struct transform *xfm_base,
enum surface_pixel_format format,
enum expansion_mode mode,
- struct csc_transform input_csc_color_matrix,
+ struct dc_csc_transform input_csc_color_matrix,
enum dc_color_space input_color_space);
void (*ipp_full_bypass)(struct transform *xfm_base);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index e764cbad881b..63fc6c499789 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -32,6 +32,8 @@
#include "inc/hw/link_encoder.h"
#include "core_status.h"
+#define EDP_BACKLIGHT_RAMP_DISABLE_LEVEL 0xFFFFFFFF
+
enum pipe_gating_control {
PIPE_GATING_CONTROL_DISABLE = 0,
PIPE_GATING_CONTROL_ENABLE,
@@ -63,6 +65,7 @@ struct dchub_init_data;
struct dc_static_screen_events;
struct resource_pool;
struct resource_context;
+struct stream_resource;
struct hw_sequencer_funcs {
@@ -80,11 +83,6 @@ struct hw_sequencer_funcs {
int num_planes,
struct dc_state *context);
- void (*set_plane_config)(
- const struct dc *dc,
- struct pipe_ctx *pipe_ctx,
- struct resource_context *res_ctx);
-
void (*program_gamut_remap)(
struct pipe_ctx *pipe_ctx);
@@ -93,6 +91,12 @@ struct hw_sequencer_funcs {
enum dc_color_space colorspace,
uint16_t *matrix);
+ void (*program_output_csc)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ enum dc_color_space colorspace,
+ uint16_t *matrix,
+ int opp_id);
+
void (*update_plane_addr)(
const struct dc *dc,
struct pipe_ctx *pipe_ctx);
@@ -154,6 +158,11 @@ struct hw_sequencer_funcs {
struct dc *dc,
struct pipe_ctx *pipe,
bool lock);
+ void (*blank_pixel_data)(
+ struct dc *dc,
+ struct stream_resource *stream_res,
+ struct dc_stream_state *stream,
+ bool blank);
void (*set_bandwidth)(
struct dc *dc,
@@ -169,7 +178,7 @@ struct hw_sequencer_funcs {
void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
int num_pipes, const struct dc_static_screen_events *events);
- enum dc_status (*prog_pixclk_crtc_otg)(
+ enum dc_status (*enable_stream_timing)(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
struct dc *dc);
@@ -201,6 +210,7 @@ struct hw_sequencer_funcs {
void (*set_cursor_position)(struct pipe_ctx *pipe);
void (*set_cursor_attribute)(struct pipe_ctx *pipe);
+
};
void color_space_to_black_color(
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index 77eb72874e90..3306e7b0b3e3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -183,6 +183,36 @@
FN(reg_name, f4), v4, \
FN(reg_name, f5), v5)
+#define REG_GET_6(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6) \
+ generic_reg_get6(CTX, REG(reg_name), \
+ FN(reg_name, f1), v1, \
+ FN(reg_name, f2), v2, \
+ FN(reg_name, f3), v3, \
+ FN(reg_name, f4), v4, \
+ FN(reg_name, f5), v5, \
+ FN(reg_name, f6), v6)
+
+#define REG_GET_7(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7) \
+ generic_reg_get7(CTX, REG(reg_name), \
+ FN(reg_name, f1), v1, \
+ FN(reg_name, f2), v2, \
+ FN(reg_name, f3), v3, \
+ FN(reg_name, f4), v4, \
+ FN(reg_name, f5), v5, \
+ FN(reg_name, f6), v6, \
+ FN(reg_name, f7), v7)
+
+#define REG_GET_8(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7, f8, v8) \
+ generic_reg_get8(CTX, REG(reg_name), \
+ FN(reg_name, f1), v1, \
+ FN(reg_name, f2), v2, \
+ FN(reg_name, f3), v3, \
+ FN(reg_name, f4), v4, \
+ FN(reg_name, f5), v5, \
+ FN(reg_name, f6), v6, \
+ FN(reg_name, f7), v7, \
+ FN(reg_name, f8), v8)
+
/* macro to poll and wait for a register field to read back given value */
#define REG_WAIT(reg_name, field, val, delay_between_poll_us, max_try) \
@@ -389,4 +419,30 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr,
uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
uint8_t shift5, uint32_t mask5, uint32_t *field_value5);
+uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6);
+
+uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+ uint8_t shift7, uint32_t mask7, uint32_t *field_value7);
+
+uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
+ uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+ uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+ uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+ uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+ uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+ uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+ uint8_t shift7, uint32_t mask7, uint32_t *field_value7,
+ uint8_t shift8, uint32_t mask8, uint32_t *field_value8);
#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_REG_HELPER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 5467332faf7b..640a647f4611 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -139,10 +139,6 @@ bool resource_validate_attach_surfaces(
struct dc_state *context,
const struct resource_pool *pool);
-void validate_guaranteed_copy_streams(
- struct dc_state *context,
- int max_streams);
-
void resource_validate_ctx_update_pointer_after_copy(
const struct dc_state *src_ctx,
struct dc_state *dst_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h
index a506c2e939f5..0b5f3a278c22 100644
--- a/drivers/gpu/drm/amd/display/dc/irq_types.h
+++ b/drivers/gpu/drm/amd/display/dc/irq_types.h
@@ -26,6 +26,8 @@
#ifndef __DAL_IRQ_TYPES_H__
#define __DAL_IRQ_TYPES_H__
+#include "os_types.h"
+
struct dc_context;
typedef void (*interrupt_handler)(void *);
@@ -135,6 +137,13 @@ enum dc_irq_source {
DC_IRQ_SOURCE_VBLANK5,
DC_IRQ_SOURCE_VBLANK6,
+ DC_IRQ_SOURCE_DC1_VLINE0,
+ DC_IRQ_SOURCE_DC2_VLINE0,
+ DC_IRQ_SOURCE_DC3_VLINE0,
+ DC_IRQ_SOURCE_DC4_VLINE0,
+ DC_IRQ_SOURCE_DC5_VLINE0,
+ DC_IRQ_SOURCE_DC6_VLINE0,
+
DAL_IRQ_SOURCES_NUMBER
};
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 9b0a04f99ac8..25029ed42d89 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -86,6 +86,7 @@
#define VI_POLARIS10_P_A0 80
#define VI_POLARIS11_M_A0 90
#define VI_POLARIS12_V_A0 100
+#define VI_VEGAM_A0 110
#define VI_UNKNOWN 0xFF
@@ -98,7 +99,9 @@
(eChipRev < VI_POLARIS11_M_A0))
#define ASIC_REV_IS_POLARIS11_M(eChipRev) ((eChipRev >= VI_POLARIS11_M_A0) && \
(eChipRev < VI_POLARIS12_V_A0))
-#define ASIC_REV_IS_POLARIS12_V(eChipRev) (eChipRev >= VI_POLARIS12_V_A0)
+#define ASIC_REV_IS_POLARIS12_V(eChipRev) ((eChipRev >= VI_POLARIS12_V_A0) && \
+ (eChipRev < VI_VEGAM_A0))
+#define ASIC_REV_IS_VEGAM(eChipRev) (eChipRev >= VI_VEGAM_A0)
/* DCE11 */
#define CZ_CARRIZO_A0 0x01
@@ -110,17 +113,19 @@
((rev >= STONEY_A0) && (rev < CZ_UNKNOWN))
/* DCE12 */
+#define AI_UNKNOWN 0xFF
#define AI_GREENLAND_P_A0 1
#define AI_GREENLAND_P_A1 2
#define AI_UNKNOWN 0xFF
#define AI_VEGA12_P_A0 20
+#define AI_VEGA20_P_A0 40
#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_VEGA12_P_A0)
#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_VEGA12_P_A0)
-#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
-#define ASICREV_IS_VEGA12_p(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
+#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_VEGA20_P_A0))
+#define ASICREV_IS_VEGA20_P(eChipRev) ((eChipRev >= AI_VEGA20_P_A0) && (eChipRev < AI_UNKNOWN))
/* DCN1_0 */
#define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index fa543965feb5..840142b65f8b 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -40,6 +40,7 @@ enum dce_version {
DCE_VERSION_10_0,
DCE_VERSION_11_0,
DCE_VERSION_11_2,
+ DCE_VERSION_11_22,
DCE_VERSION_12_0,
DCE_VERSION_MAX,
DCN_VERSION_1_0,
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index 0de258622c12..bb0d4ebba9f0 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -26,9 +26,13 @@
#ifndef __DAL_FIXED31_32_H__
#define __DAL_FIXED31_32_H__
-#include "os_types.h"
-
#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32
+#ifndef LLONG_MIN
+#define LLONG_MIN (1LL<<63)
+#endif
+#ifndef LLONG_MAX
+#define LLONG_MAX (-1LL>>1)
+#endif
/*
* @brief
@@ -44,24 +48,25 @@
*/
struct fixed31_32 {
- int64_t value;
+ long long value;
};
+
/*
* @brief
* Useful constants
*/
-static const struct fixed31_32 dal_fixed31_32_zero = { 0 };
-static const struct fixed31_32 dal_fixed31_32_epsilon = { 1LL };
-static const struct fixed31_32 dal_fixed31_32_half = { 0x80000000LL };
-static const struct fixed31_32 dal_fixed31_32_one = { 0x100000000LL };
+static const struct fixed31_32 dc_fixpt_zero = { 0 };
+static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
+static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL };
+static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL };
-static const struct fixed31_32 dal_fixed31_32_pi = { 13493037705LL };
-static const struct fixed31_32 dal_fixed31_32_two_pi = { 26986075409LL };
-static const struct fixed31_32 dal_fixed31_32_e = { 11674931555LL };
-static const struct fixed31_32 dal_fixed31_32_ln2 = { 2977044471LL };
-static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL };
+static const struct fixed31_32 dc_fixpt_pi = { 13493037705LL };
+static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL };
+static const struct fixed31_32 dc_fixpt_e = { 11674931555LL };
+static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL };
+static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL };
/*
* @brief
@@ -72,24 +77,19 @@ static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL };
* @brief
* result = numerator / denominator
*/
-struct fixed31_32 dal_fixed31_32_from_fraction(
- int64_t numerator,
- int64_t denominator);
+struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator);
/*
* @brief
* result = arg
*/
-struct fixed31_32 dal_fixed31_32_from_int_nonconst(int64_t arg);
-static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg)
+static inline struct fixed31_32 dc_fixpt_from_int(int arg)
{
- if (__builtin_constant_p(arg)) {
- struct fixed31_32 res;
- BUILD_BUG_ON((LONG_MIN > arg) || (arg > LONG_MAX));
- res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
- return res;
- } else
- return dal_fixed31_32_from_int_nonconst(arg);
+ struct fixed31_32 res;
+
+ res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+ return res;
}
/*
@@ -101,7 +101,7 @@ static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg)
* @brief
* result = -arg
*/
-static inline struct fixed31_32 dal_fixed31_32_neg(struct fixed31_32 arg)
+static inline struct fixed31_32 dc_fixpt_neg(struct fixed31_32 arg)
{
struct fixed31_32 res;
@@ -114,10 +114,10 @@ static inline struct fixed31_32 dal_fixed31_32_neg(struct fixed31_32 arg)
* @brief
* result = abs(arg) := (arg >= 0) ? arg : -arg
*/
-static inline struct fixed31_32 dal_fixed31_32_abs(struct fixed31_32 arg)
+static inline struct fixed31_32 dc_fixpt_abs(struct fixed31_32 arg)
{
if (arg.value < 0)
- return dal_fixed31_32_neg(arg);
+ return dc_fixpt_neg(arg);
else
return arg;
}
@@ -131,8 +131,7 @@ static inline struct fixed31_32 dal_fixed31_32_abs(struct fixed31_32 arg)
* @brief
* result = arg1 < arg2
*/
-static inline bool dal_fixed31_32_lt(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline bool dc_fixpt_lt(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
return arg1.value < arg2.value;
}
@@ -141,8 +140,7 @@ static inline bool dal_fixed31_32_lt(struct fixed31_32 arg1,
* @brief
* result = arg1 <= arg2
*/
-static inline bool dal_fixed31_32_le(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline bool dc_fixpt_le(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
return arg1.value <= arg2.value;
}
@@ -151,8 +149,7 @@ static inline bool dal_fixed31_32_le(struct fixed31_32 arg1,
* @brief
* result = arg1 == arg2
*/
-static inline bool dal_fixed31_32_eq(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline bool dc_fixpt_eq(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
return arg1.value == arg2.value;
}
@@ -161,8 +158,7 @@ static inline bool dal_fixed31_32_eq(struct fixed31_32 arg1,
* @brief
* result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_min(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_min(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
if (arg1.value <= arg2.value)
return arg1;
@@ -174,8 +170,7 @@ static inline struct fixed31_32 dal_fixed31_32_min(struct fixed31_32 arg1,
* @brief
* result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1
*/
-static inline struct fixed31_32 dal_fixed31_32_max(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_max(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
if (arg1.value <= arg2.value)
return arg2;
@@ -189,14 +184,14 @@ static inline struct fixed31_32 dal_fixed31_32_max(struct fixed31_32 arg1,
* result = | arg, when min_value < arg < max_value
* | max_value, when arg >= max_value
*/
-static inline struct fixed31_32 dal_fixed31_32_clamp(
+static inline struct fixed31_32 dc_fixpt_clamp(
struct fixed31_32 arg,
struct fixed31_32 min_value,
struct fixed31_32 max_value)
{
- if (dal_fixed31_32_le(arg, min_value))
+ if (dc_fixpt_le(arg, min_value))
return min_value;
- else if (dal_fixed31_32_le(max_value, arg))
+ else if (dc_fixpt_le(max_value, arg))
return max_value;
else
return arg;
@@ -211,21 +206,30 @@ static inline struct fixed31_32 dal_fixed31_32_clamp(
* @brief
* result = arg << shift
*/
-struct fixed31_32 dal_fixed31_32_shl(
- struct fixed31_32 arg,
- uint8_t shift);
+static inline struct fixed31_32 dc_fixpt_shl(struct fixed31_32 arg, unsigned char shift)
+{
+ ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
+ ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift))));
+
+ arg.value = arg.value << shift;
+
+ return arg;
+}
/*
* @brief
* result = arg >> shift
*/
-static inline struct fixed31_32 dal_fixed31_32_shr(
- struct fixed31_32 arg,
- uint8_t shift)
+static inline struct fixed31_32 dc_fixpt_shr(struct fixed31_32 arg, unsigned char shift)
{
- struct fixed31_32 res;
- res.value = arg.value >> shift;
- return res;
+ bool negative = arg.value < 0;
+
+ if (negative)
+ arg.value = -arg.value;
+ arg.value = arg.value >> shift;
+ if (negative)
+ arg.value = -arg.value;
+ return arg;
}
/*
@@ -237,38 +241,50 @@ static inline struct fixed31_32 dal_fixed31_32_shr(
* @brief
* result = arg1 + arg2
*/
-struct fixed31_32 dal_fixed31_32_add(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_add(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+ struct fixed31_32 res;
+
+ ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
+ ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
+
+ res.value = arg1.value + arg2.value;
+
+ return res;
+}
/*
* @brief
* result = arg1 + arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_add_int(struct fixed31_32 arg1,
- int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_add_int(struct fixed31_32 arg1, int arg2)
{
- return dal_fixed31_32_add(arg1,
- dal_fixed31_32_from_int(arg2));
+ return dc_fixpt_add(arg1, dc_fixpt_from_int(arg2));
}
/*
* @brief
* result = arg1 - arg2
*/
-struct fixed31_32 dal_fixed31_32_sub(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_sub(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+ struct fixed31_32 res;
+
+ ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
+ ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
+
+ res.value = arg1.value - arg2.value;
+
+ return res;
+}
/*
* @brief
* result = arg1 - arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1,
- int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_sub_int(struct fixed31_32 arg1, int arg2)
{
- return dal_fixed31_32_sub(arg1,
- dal_fixed31_32_from_int(arg2));
+ return dc_fixpt_sub(arg1, dc_fixpt_from_int(arg2));
}
@@ -281,49 +297,40 @@ static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1,
* @brief
* result = arg1 * arg2
*/
-struct fixed31_32 dal_fixed31_32_mul(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2);
+struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2);
/*
* @brief
* result = arg1 * arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_mul_int(struct fixed31_32 arg1,
- int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_mul_int(struct fixed31_32 arg1, int arg2)
{
- return dal_fixed31_32_mul(arg1,
- dal_fixed31_32_from_int(arg2));
+ return dc_fixpt_mul(arg1, dc_fixpt_from_int(arg2));
}
/*
* @brief
* result = square(arg) := arg * arg
*/
-struct fixed31_32 dal_fixed31_32_sqr(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg);
/*
* @brief
* result = arg1 / arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_div_int(struct fixed31_32 arg1,
- int64_t arg2)
+static inline struct fixed31_32 dc_fixpt_div_int(struct fixed31_32 arg1, long long arg2)
{
- return dal_fixed31_32_from_fraction(arg1.value,
- dal_fixed31_32_from_int(arg2).value);
+ return dc_fixpt_from_fraction(arg1.value, dc_fixpt_from_int(arg2).value);
}
/*
* @brief
* result = arg1 / arg2
*/
-static inline struct fixed31_32 dal_fixed31_32_div(struct fixed31_32 arg1,
- struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_div(struct fixed31_32 arg1, struct fixed31_32 arg2)
{
- return dal_fixed31_32_from_fraction(arg1.value,
- arg2.value);
+ return dc_fixpt_from_fraction(arg1.value, arg2.value);
}
/*
@@ -338,8 +345,7 @@ static inline struct fixed31_32 dal_fixed31_32_div(struct fixed31_32 arg1,
* @note
* No special actions taken in case argument is zero.
*/
-struct fixed31_32 dal_fixed31_32_recip(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg);
/*
* @brief
@@ -354,8 +360,7 @@ struct fixed31_32 dal_fixed31_32_recip(
* Argument specified in radians,
* internally it's normalized to [-2pi...2pi] range.
*/
-struct fixed31_32 dal_fixed31_32_sinc(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg);
/*
* @brief
@@ -365,8 +370,7 @@ struct fixed31_32 dal_fixed31_32_sinc(
* Argument specified in radians,
* internally it's normalized to [-2pi...2pi] range.
*/
-struct fixed31_32 dal_fixed31_32_sin(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg);
/*
* @brief
@@ -378,8 +382,7 @@ struct fixed31_32 dal_fixed31_32_sin(
* passing arguments outside that range
* will cause incorrect result!
*/
-struct fixed31_32 dal_fixed31_32_cos(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg);
/*
* @brief
@@ -393,8 +396,7 @@ struct fixed31_32 dal_fixed31_32_cos(
* @note
* Currently, function is verified for abs(arg) <= 1.
*/
-struct fixed31_32 dal_fixed31_32_exp(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg);
/*
* @brief
@@ -406,8 +408,7 @@ struct fixed31_32 dal_fixed31_32_exp(
* Currently, no special actions taken
* in case of invalid argument(s). Take care!
*/
-struct fixed31_32 dal_fixed31_32_log(
- struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg);
/*
* @brief
@@ -421,9 +422,13 @@ struct fixed31_32 dal_fixed31_32_log(
* @note
* Currently, abs(arg1) should be less than 1. Take care!
*/
-struct fixed31_32 dal_fixed31_32_pow(
- struct fixed31_32 arg1,
- struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_pow(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+ return dc_fixpt_exp(
+ dc_fixpt_mul(
+ dc_fixpt_log(arg1),
+ arg2));
+}
/*
* @brief
@@ -434,22 +439,56 @@ struct fixed31_32 dal_fixed31_32_pow(
* @brief
* result = floor(arg) := greatest integer lower than or equal to arg
*/
-int32_t dal_fixed31_32_floor(
- struct fixed31_32 arg);
+static inline int dc_fixpt_floor(struct fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
/*
* @brief
* result = round(arg) := integer nearest to arg
*/
-int32_t dal_fixed31_32_round(
- struct fixed31_32 arg);
+static inline int dc_fixpt_round(struct fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ const long long summand = dc_fixpt_half.value;
+
+ ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+ arg_value += summand;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
/*
* @brief
* result = ceil(arg) := lowest integer greater than or equal to arg
*/
-int32_t dal_fixed31_32_ceil(
- struct fixed31_32 arg);
+static inline int dc_fixpt_ceil(struct fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ const long long summand = dc_fixpt_one.value -
+ dc_fixpt_epsilon.value;
+
+ ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+ arg_value += summand;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
/* the following two function are used in scaler hw programming to convert fixed
* point value to format 2 bits from integer part and 19 bits from fractional
@@ -457,20 +496,31 @@ int32_t dal_fixed31_32_ceil(
* fractional
*/
-uint32_t dal_fixed31_32_u2d19(
- struct fixed31_32 arg);
+unsigned int dc_fixpt_u2d19(struct fixed31_32 arg);
+
+unsigned int dc_fixpt_u0d19(struct fixed31_32 arg);
-uint32_t dal_fixed31_32_u0d19(
- struct fixed31_32 arg);
+unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg);
+unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg);
-uint32_t dal_fixed31_32_clamp_u0d14(
- struct fixed31_32 arg);
+int dc_fixpt_s4d19(struct fixed31_32 arg);
-uint32_t dal_fixed31_32_clamp_u0d10(
- struct fixed31_32 arg);
+static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigned int frac_bits)
+{
+ bool negative = arg.value < 0;
-int32_t dal_fixed31_32_s4d19(
- struct fixed31_32 arg);
+ if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) {
+ ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ return arg;
+ }
+
+ if (negative)
+ arg.value = -arg.value;
+ arg.value &= (~0LL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits);
+ if (negative)
+ arg.value = -arg.value;
+ return arg;
+}
#endif
diff --git a/drivers/gpu/drm/amd/display/include/fixed32_32.h b/drivers/gpu/drm/amd/display/include/fixed32_32.h
deleted file mode 100644
index 9c70341fe026..000000000000
--- a/drivers/gpu/drm/amd/display/include/fixed32_32.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#ifndef __DAL_FIXED32_32_H__
-#define __DAL_FIXED32_32_H__
-
-#include "os_types.h"
-
-struct fixed32_32 {
- uint64_t value;
-};
-
-static const struct fixed32_32 dal_fixed32_32_zero = { 0 };
-static const struct fixed32_32 dal_fixed32_32_one = { 0x100000000LL };
-static const struct fixed32_32 dal_fixed32_32_half = { 0x80000000LL };
-
-struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d);
-static inline struct fixed32_32 dal_fixed32_32_from_int(uint32_t value)
-{
- struct fixed32_32 fx;
-
- fx.value = (uint64_t)value<<32;
- return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_add_int(
- struct fixed32_32 lhs,
- uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_sub(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_sub_int(
- struct fixed32_32 lhs,
- uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_mul(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_mul_int(
- struct fixed32_32 lhs,
- uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_div(
- struct fixed32_32 lhs,
- struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_div_int(
- struct fixed32_32 lhs,
- uint32_t rhs);
-
-static inline struct fixed32_32 dal_fixed32_32_min(struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- return (lhs.value < rhs.value) ? lhs : rhs;
-}
-
-static inline struct fixed32_32 dal_fixed32_32_max(struct fixed32_32 lhs,
- struct fixed32_32 rhs)
-{
- return (lhs.value > rhs.value) ? lhs : rhs;
-}
-
-static inline bool dal_fixed32_32_gt(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
- return lhs.value > rhs.value;
-}
-
-static inline bool dal_fixed32_32_gt_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- return lhs.value > ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_lt(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
- return lhs.value < rhs.value;
-}
-
-static inline bool dal_fixed32_32_lt_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- return lhs.value < ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_le(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
- return lhs.value <= rhs.value;
-}
-
-static inline bool dal_fixed32_32_le_int(struct fixed32_32 lhs, uint32_t rhs)
-{
- return lhs.value <= ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_eq(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
- return lhs.value == rhs.value;
-}
-
-uint32_t dal_fixed32_32_ceil(struct fixed32_32 value);
-static inline uint32_t dal_fixed32_32_floor(struct fixed32_32 value)
-{
- return value.value>>32;
-}
-
-uint32_t dal_fixed32_32_round(struct fixed32_32 value);
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h
index 28dee960d509..dc98d6d4b2bd 100644
--- a/drivers/gpu/drm/amd/display/include/logger_interface.h
+++ b/drivers/gpu/drm/amd/display/include/logger_interface.h
@@ -190,4 +190,13 @@ void context_clock_trace(
} \
} while (0)
+#define DISPLAY_STATS_BEGIN(entry) \
+ dm_logger_open(dc->ctx->logger, &entry, LOG_DISPLAYSTATS)
+
+#define DISPLAY_STATS(msg, ...) \
+ dm_logger_append(&log_entry, msg, ##__VA_ARGS__)
+
+#define DISPLAY_STATS_END(entry) \
+ dm_logger_close(&entry)
+
#endif /* __DAL_LOGGER_INTERFACE_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index 427796bdc14a..0a540b9897a6 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -29,39 +29,39 @@
#include "os_types.h"
#define MAX_NAME_LEN 32
-#define DC_LOG_ERROR(a, ...) dm_logger_write(DC_LOGGER, LOG_ERROR, a, ## __VA_ARGS__)
-#define DC_LOG_WARNING(a, ...) dm_logger_write(DC_LOGGER, LOG_WARNING, a, ## __VA_ARGS__)
-#define DC_LOG_DEBUG(a, ...) dm_logger_write(DC_LOGGER, LOG_DEBUG, a, ## __VA_ARGS__)
-#define DC_LOG_DC(a, ...) dm_logger_write(DC_LOGGER, LOG_DC, a, ## __VA_ARGS__)
-#define DC_LOG_DTN(a, ...) dm_logger_write(DC_LOGGER, LOG_DTN, a, ## __VA_ARGS__)
-#define DC_LOG_SURFACE(a, ...) dm_logger_write(DC_LOGGER, LOG_SURFACE, a, ## __VA_ARGS__)
-#define DC_LOG_HW_HOTPLUG(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HOTPLUG, a, ## __VA_ARGS__)
-#define DC_LOG_HW_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_LINK_TRAINING, a, ## __VA_ARGS__)
-#define DC_LOG_HW_SET_MODE(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_SET_MODE, a, ## __VA_ARGS__)
-#define DC_LOG_HW_RESUME_S3(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_RESUME_S3, a, ## __VA_ARGS__)
-#define DC_LOG_HW_AUDIO(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_AUDIO, a, ## __VA_ARGS__)
-#define DC_LOG_HW_HPD_IRQ(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HPD_IRQ, a, ## __VA_ARGS__)
-#define DC_LOG_MST(a, ...) dm_logger_write(DC_LOGGER, LOG_MST, a, ## __VA_ARGS__)
-#define DC_LOG_SCALER(a, ...) dm_logger_write(DC_LOGGER, LOG_SCALER, a, ## __VA_ARGS__)
-#define DC_LOG_BIOS(a, ...) dm_logger_write(DC_LOGGER, LOG_BIOS, a, ## __VA_ARGS__)
-#define DC_LOG_BANDWIDTH_CALCS(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_CALCS, a, ## __VA_ARGS__)
-#define DC_LOG_BANDWIDTH_VALIDATION(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_VALIDATION, a, ## __VA_ARGS__)
-#define DC_LOG_I2C_AUX(a, ...) dm_logger_write(DC_LOGGER, LOG_I2C_AUX, a, ## __VA_ARGS__)
-#define DC_LOG_SYNC(a, ...) dm_logger_write(DC_LOGGER, LOG_SYNC, a, ## __VA_ARGS__)
-#define DC_LOG_BACKLIGHT(a, ...) dm_logger_write(DC_LOGGER, LOG_BACKLIGHT, a, ## __VA_ARGS__)
-#define DC_LOG_FEATURE_OVERRIDE(a, ...) dm_logger_write(DC_LOGGER, LOG_FEATURE_OVERRIDE, a, ## __VA_ARGS__)
-#define DC_LOG_DETECTION_EDID_PARSER(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_EDID_PARSER, a, ## __VA_ARGS__)
-#define DC_LOG_DETECTION_DP_CAPS(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_DP_CAPS, a, ## __VA_ARGS__)
-#define DC_LOG_RESOURCE(a, ...) dm_logger_write(DC_LOGGER, LOG_RESOURCE, a, ## __VA_ARGS__)
-#define DC_LOG_DML(a, ...) dm_logger_write(DC_LOGGER, LOG_DML, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_MODE_SET(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_MODE_SET, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_DETECTION(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_DETECTION, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_TRAINING, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_LINK_LOSS(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_LOSS, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_UNDERFLOW(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_UNDERFLOW, a, ## __VA_ARGS__)
-#define DC_LOG_IF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_IF_TRACE, a, ## __VA_ARGS__)
-#define DC_LOG_PERF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_PERF_TRACE, a, ## __VA_ARGS__)
+#define DC_LOG_ERROR(...) DRM_ERROR(__VA_ARGS__)
+#define DC_LOG_WARNING(...) DRM_WARN(__VA_ARGS__)
+#define DC_LOG_DEBUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
+#define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
+#define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_RESUME_S3(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_AUDIO(...) pr_debug("[HW_AUDIO]:"__VA_ARGS__)
+#define DC_LOG_HW_HPD_IRQ(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_MST(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SCALER(...) pr_debug("[SCALER]:"__VA_ARGS__)
+#define DC_LOG_BIOS(...) pr_debug("[BIOS]:"__VA_ARGS__)
+#define DC_LOG_BANDWIDTH_CALCS(...) pr_debug("[BANDWIDTH_CALCS]:"__VA_ARGS__)
+#define DC_LOG_BANDWIDTH_VALIDATION(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_I2C_AUX(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SYNC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_BACKLIGHT(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_FEATURE_OVERRIDE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DETECTION_EDID_PARSER(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DETECTION_DP_CAPS(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_RESOURCE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DML(...) pr_debug("[DML]:"__VA_ARGS__)
+#define DC_LOG_EVENT_MODE_SET(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_DETECTION(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_LINK_TRAINING(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_LINK_LOSS(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_UNDERFLOW(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_IF_TRACE(...) pr_debug("[IF_TRACE]:"__VA_ARGS__)
+#define DC_LOG_PERF_TRACE(...) DRM_DEBUG_KMS(__VA_ARGS__)
struct dal_logger;
@@ -98,7 +98,7 @@ enum dc_log_type {
LOG_EVENT_UNDERFLOW,
LOG_IF_TRACE,
LOG_PERF_TRACE,
- LOG_PROFILING,
+ LOG_DISPLAYSTATS,
LOG_SECTION_TOTAL_COUNT
};
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index b3747a019deb..0cd111d59018 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -43,7 +43,7 @@ static bool de_pq_initialized; /* = false; */
/* one-time setup of X points */
void setup_x_points_distribution(void)
{
- struct fixed31_32 region_size = dal_fixed31_32_from_int(128);
+ struct fixed31_32 region_size = dc_fixpt_from_int(128);
int32_t segment;
uint32_t seg_offset;
uint32_t index;
@@ -53,8 +53,8 @@ void setup_x_points_distribution(void)
coordinates_x[MAX_HW_POINTS + 1].x = region_size;
for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
- region_size = dal_fixed31_32_div_int(region_size, 2);
- increment = dal_fixed31_32_div_int(region_size,
+ region_size = dc_fixpt_div_int(region_size, 2);
+ increment = dc_fixpt_div_int(region_size,
NUM_PTS_IN_REGION);
seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
coordinates_x[seg_offset].x = region_size;
@@ -62,7 +62,7 @@ void setup_x_points_distribution(void)
for (index = seg_offset + 1;
index < seg_offset + NUM_PTS_IN_REGION;
index++) {
- coordinates_x[index].x = dal_fixed31_32_add
+ coordinates_x[index].x = dc_fixpt_add
(coordinates_x[index-1].x, increment);
}
}
@@ -72,63 +72,63 @@ static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
{
/* consts for PQ gamma formula. */
const struct fixed31_32 m1 =
- dal_fixed31_32_from_fraction(159301758, 1000000000);
+ dc_fixpt_from_fraction(159301758, 1000000000);
const struct fixed31_32 m2 =
- dal_fixed31_32_from_fraction(7884375, 100000);
+ dc_fixpt_from_fraction(7884375, 100000);
const struct fixed31_32 c1 =
- dal_fixed31_32_from_fraction(8359375, 10000000);
+ dc_fixpt_from_fraction(8359375, 10000000);
const struct fixed31_32 c2 =
- dal_fixed31_32_from_fraction(188515625, 10000000);
+ dc_fixpt_from_fraction(188515625, 10000000);
const struct fixed31_32 c3 =
- dal_fixed31_32_from_fraction(186875, 10000);
+ dc_fixpt_from_fraction(186875, 10000);
struct fixed31_32 l_pow_m1;
struct fixed31_32 base;
- if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero))
- in_x = dal_fixed31_32_zero;
+ if (dc_fixpt_lt(in_x, dc_fixpt_zero))
+ in_x = dc_fixpt_zero;
- l_pow_m1 = dal_fixed31_32_pow(in_x, m1);
- base = dal_fixed31_32_div(
- dal_fixed31_32_add(c1,
- (dal_fixed31_32_mul(c2, l_pow_m1))),
- dal_fixed31_32_add(dal_fixed31_32_one,
- (dal_fixed31_32_mul(c3, l_pow_m1))));
- *out_y = dal_fixed31_32_pow(base, m2);
+ l_pow_m1 = dc_fixpt_pow(in_x, m1);
+ base = dc_fixpt_div(
+ dc_fixpt_add(c1,
+ (dc_fixpt_mul(c2, l_pow_m1))),
+ dc_fixpt_add(dc_fixpt_one,
+ (dc_fixpt_mul(c3, l_pow_m1))));
+ *out_y = dc_fixpt_pow(base, m2);
}
static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
{
/* consts for dePQ gamma formula. */
const struct fixed31_32 m1 =
- dal_fixed31_32_from_fraction(159301758, 1000000000);
+ dc_fixpt_from_fraction(159301758, 1000000000);
const struct fixed31_32 m2 =
- dal_fixed31_32_from_fraction(7884375, 100000);
+ dc_fixpt_from_fraction(7884375, 100000);
const struct fixed31_32 c1 =
- dal_fixed31_32_from_fraction(8359375, 10000000);
+ dc_fixpt_from_fraction(8359375, 10000000);
const struct fixed31_32 c2 =
- dal_fixed31_32_from_fraction(188515625, 10000000);
+ dc_fixpt_from_fraction(188515625, 10000000);
const struct fixed31_32 c3 =
- dal_fixed31_32_from_fraction(186875, 10000);
+ dc_fixpt_from_fraction(186875, 10000);
struct fixed31_32 l_pow_m1;
struct fixed31_32 base, div;
- if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero))
- in_x = dal_fixed31_32_zero;
+ if (dc_fixpt_lt(in_x, dc_fixpt_zero))
+ in_x = dc_fixpt_zero;
- l_pow_m1 = dal_fixed31_32_pow(in_x,
- dal_fixed31_32_div(dal_fixed31_32_one, m2));
- base = dal_fixed31_32_sub(l_pow_m1, c1);
+ l_pow_m1 = dc_fixpt_pow(in_x,
+ dc_fixpt_div(dc_fixpt_one, m2));
+ base = dc_fixpt_sub(l_pow_m1, c1);
- if (dal_fixed31_32_lt(base, dal_fixed31_32_zero))
- base = dal_fixed31_32_zero;
+ if (dc_fixpt_lt(base, dc_fixpt_zero))
+ base = dc_fixpt_zero;
- div = dal_fixed31_32_sub(c2, dal_fixed31_32_mul(c3, l_pow_m1));
+ div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
- *out_y = dal_fixed31_32_pow(dal_fixed31_32_div(base, div),
- dal_fixed31_32_div(dal_fixed31_32_one, m1));
+ *out_y = dc_fixpt_pow(dc_fixpt_div(base, div),
+ dc_fixpt_div(dc_fixpt_one, m1));
}
/* one-time pre-compute PQ values - only for sdr_white_level 80 */
@@ -138,14 +138,14 @@ void precompute_pq(void)
struct fixed31_32 x;
const struct hw_x_point *coord_x = coordinates_x + 32;
struct fixed31_32 scaling_factor =
- dal_fixed31_32_from_fraction(80, 10000);
+ dc_fixpt_from_fraction(80, 10000);
/* pow function has problems with arguments too small */
for (i = 0; i < 32; i++)
- pq_table[i] = dal_fixed31_32_zero;
+ pq_table[i] = dc_fixpt_zero;
for (i = 32; i <= MAX_HW_POINTS; i++) {
- x = dal_fixed31_32_mul(coord_x->x, scaling_factor);
+ x = dc_fixpt_mul(coord_x->x, scaling_factor);
compute_pq(x, &pq_table[i]);
++coord_x;
}
@@ -158,7 +158,7 @@ void precompute_de_pq(void)
struct fixed31_32 y;
uint32_t begin_index, end_index;
- struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125);
+ struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
/* X points is 2^-25 to 2^7
* De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
@@ -167,11 +167,11 @@ void precompute_de_pq(void)
end_index = begin_index + 12 * NUM_PTS_IN_REGION;
for (i = 0; i <= begin_index; i++)
- de_pq_table[i] = dal_fixed31_32_zero;
+ de_pq_table[i] = dc_fixpt_zero;
for (; i <= end_index; i++) {
compute_de_pq(coordinates_x[i].x, &y);
- de_pq_table[i] = dal_fixed31_32_mul(y, scaling_factor);
+ de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
}
for (; i <= MAX_HW_POINTS; i++)
@@ -185,25 +185,25 @@ struct dividers {
static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4)
{
- static const int32_t numerator01[] = { 31308, 180000};
- static const int32_t numerator02[] = { 12920, 4500};
- static const int32_t numerator03[] = { 55, 99};
- static const int32_t numerator04[] = { 55, 99};
- static const int32_t numerator05[] = { 2400, 2200};
+ static const int32_t numerator01[] = { 31308, 180000};
+ static const int32_t numerator02[] = { 12920, 4500};
+ static const int32_t numerator03[] = { 55, 99};
+ static const int32_t numerator04[] = { 55, 99};
+ static const int32_t numerator05[] = { 2400, 2200};
- uint32_t i = 0;
- uint32_t index = is_2_4 == true ? 0:1;
+ uint32_t i = 0;
+ uint32_t index = is_2_4 == true ? 0:1;
do {
- coefficients->a0[i] = dal_fixed31_32_from_fraction(
+ coefficients->a0[i] = dc_fixpt_from_fraction(
numerator01[index], 10000000);
- coefficients->a1[i] = dal_fixed31_32_from_fraction(
+ coefficients->a1[i] = dc_fixpt_from_fraction(
numerator02[index], 1000);
- coefficients->a2[i] = dal_fixed31_32_from_fraction(
+ coefficients->a2[i] = dc_fixpt_from_fraction(
numerator03[index], 1000);
- coefficients->a3[i] = dal_fixed31_32_from_fraction(
+ coefficients->a3[i] = dc_fixpt_from_fraction(
numerator04[index], 1000);
- coefficients->user_gamma[i] = dal_fixed31_32_from_fraction(
+ coefficients->user_gamma[i] = dc_fixpt_from_fraction(
numerator05[index], 1000);
++i;
@@ -218,33 +218,33 @@ static struct fixed31_32 translate_from_linear_space(
struct fixed31_32 a3,
struct fixed31_32 gamma)
{
- const struct fixed31_32 one = dal_fixed31_32_from_int(1);
+ const struct fixed31_32 one = dc_fixpt_from_int(1);
- if (dal_fixed31_32_lt(one, arg))
+ if (dc_fixpt_lt(one, arg))
return one;
- if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0)))
- return dal_fixed31_32_sub(
+ if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
+ return dc_fixpt_sub(
a2,
- dal_fixed31_32_mul(
- dal_fixed31_32_add(
+ dc_fixpt_mul(
+ dc_fixpt_add(
one,
a3),
- dal_fixed31_32_pow(
- dal_fixed31_32_neg(arg),
- dal_fixed31_32_recip(gamma))));
- else if (dal_fixed31_32_le(a0, arg))
- return dal_fixed31_32_sub(
- dal_fixed31_32_mul(
- dal_fixed31_32_add(
+ dc_fixpt_pow(
+ dc_fixpt_neg(arg),
+ dc_fixpt_recip(gamma))));
+ else if (dc_fixpt_le(a0, arg))
+ return dc_fixpt_sub(
+ dc_fixpt_mul(
+ dc_fixpt_add(
one,
a3),
- dal_fixed31_32_pow(
+ dc_fixpt_pow(
arg,
- dal_fixed31_32_recip(gamma))),
+ dc_fixpt_recip(gamma))),
a2);
else
- return dal_fixed31_32_mul(
+ return dc_fixpt_mul(
arg,
a1);
}
@@ -259,25 +259,25 @@ static struct fixed31_32 translate_to_linear_space(
{
struct fixed31_32 linear;
- a0 = dal_fixed31_32_mul(a0, a1);
- if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0)))
+ a0 = dc_fixpt_mul(a0, a1);
+ if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
- linear = dal_fixed31_32_neg(
- dal_fixed31_32_pow(
- dal_fixed31_32_div(
- dal_fixed31_32_sub(a2, arg),
- dal_fixed31_32_add(
- dal_fixed31_32_one, a3)), gamma));
+ linear = dc_fixpt_neg(
+ dc_fixpt_pow(
+ dc_fixpt_div(
+ dc_fixpt_sub(a2, arg),
+ dc_fixpt_add(
+ dc_fixpt_one, a3)), gamma));
- else if (dal_fixed31_32_le(dal_fixed31_32_neg(a0), arg) &&
- dal_fixed31_32_le(arg, a0))
- linear = dal_fixed31_32_div(arg, a1);
+ else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
+ dc_fixpt_le(arg, a0))
+ linear = dc_fixpt_div(arg, a1);
else
- linear = dal_fixed31_32_pow(
- dal_fixed31_32_div(
- dal_fixed31_32_add(a2, arg),
- dal_fixed31_32_add(
- dal_fixed31_32_one, a3)), gamma);
+ linear = dc_fixpt_pow(
+ dc_fixpt_div(
+ dc_fixpt_add(a2, arg),
+ dc_fixpt_add(
+ dc_fixpt_one, a3)), gamma);
return linear;
}
@@ -352,8 +352,8 @@ static bool find_software_points(
right = axis_x[max_number - 1].b;
}
- if (dal_fixed31_32_le(left, hw_point) &&
- dal_fixed31_32_le(hw_point, right)) {
+ if (dc_fixpt_le(left, hw_point) &&
+ dc_fixpt_le(hw_point, right)) {
*index_to_start = i;
*index_left = i;
@@ -366,7 +366,7 @@ static bool find_software_points(
return true;
} else if ((i == *index_to_start) &&
- dal_fixed31_32_le(hw_point, left)) {
+ dc_fixpt_le(hw_point, left)) {
*index_to_start = i;
*index_left = i;
*index_right = i;
@@ -375,7 +375,7 @@ static bool find_software_points(
return true;
} else if ((i == max_number - 1) &&
- dal_fixed31_32_le(right, hw_point)) {
+ dc_fixpt_le(right, hw_point)) {
*index_to_start = i;
*index_left = i;
*index_right = i;
@@ -457,17 +457,17 @@ static bool build_custom_gamma_mapping_coefficients_worker(
}
if (hw_pos == HW_POINT_POSITION_MIDDLE)
- point->coeff = dal_fixed31_32_div(
- dal_fixed31_32_sub(
+ point->coeff = dc_fixpt_div(
+ dc_fixpt_sub(
coord_x,
left_pos),
- dal_fixed31_32_sub(
+ dc_fixpt_sub(
right_pos,
left_pos));
else if (hw_pos == HW_POINT_POSITION_LEFT)
- point->coeff = dal_fixed31_32_zero;
+ point->coeff = dc_fixpt_zero;
else if (hw_pos == HW_POINT_POSITION_RIGHT)
- point->coeff = dal_fixed31_32_from_int(2);
+ point->coeff = dc_fixpt_from_int(2);
else {
BREAK_TO_DEBUGGER();
return false;
@@ -502,45 +502,45 @@ static struct fixed31_32 calculate_mapped_value(
if ((point->left_index < 0) || (point->left_index > max_index)) {
BREAK_TO_DEBUGGER();
- return dal_fixed31_32_zero;
+ return dc_fixpt_zero;
}
if ((point->right_index < 0) || (point->right_index > max_index)) {
BREAK_TO_DEBUGGER();
- return dal_fixed31_32_zero;
+ return dc_fixpt_zero;
}
if (point->pos == HW_POINT_POSITION_MIDDLE)
if (channel == CHANNEL_NAME_RED)
- result = dal_fixed31_32_add(
- dal_fixed31_32_mul(
+ result = dc_fixpt_add(
+ dc_fixpt_mul(
point->coeff,
- dal_fixed31_32_sub(
+ dc_fixpt_sub(
rgb[point->right_index].r,
rgb[point->left_index].r)),
rgb[point->left_index].r);
else if (channel == CHANNEL_NAME_GREEN)
- result = dal_fixed31_32_add(
- dal_fixed31_32_mul(
+ result = dc_fixpt_add(
+ dc_fixpt_mul(
point->coeff,
- dal_fixed31_32_sub(
+ dc_fixpt_sub(
rgb[point->right_index].g,
rgb[point->left_index].g)),
rgb[point->left_index].g);
else
- result = dal_fixed31_32_add(
- dal_fixed31_32_mul(
+ result = dc_fixpt_add(
+ dc_fixpt_mul(
point->coeff,
- dal_fixed31_32_sub(
+ dc_fixpt_sub(
rgb[point->right_index].b,
rgb[point->left_index].b)),
rgb[point->left_index].b);
else if (point->pos == HW_POINT_POSITION_LEFT) {
BREAK_TO_DEBUGGER();
- result = dal_fixed31_32_zero;
+ result = dc_fixpt_zero;
} else {
BREAK_TO_DEBUGGER();
- result = dal_fixed31_32_one;
+ result = dc_fixpt_one;
}
return result;
@@ -558,7 +558,7 @@ static void build_pq(struct pwl_float_data_ex *rgb_regamma,
struct fixed31_32 x;
struct fixed31_32 output;
struct fixed31_32 scaling_factor =
- dal_fixed31_32_from_fraction(sdr_white_level, 10000);
+ dc_fixpt_from_fraction(sdr_white_level, 10000);
if (!pq_initialized && sdr_white_level == 80) {
precompute_pq();
@@ -579,15 +579,15 @@ static void build_pq(struct pwl_float_data_ex *rgb_regamma,
if (sdr_white_level == 80) {
output = pq_table[i];
} else {
- x = dal_fixed31_32_mul(coord_x->x, scaling_factor);
+ x = dc_fixpt_mul(coord_x->x, scaling_factor);
compute_pq(x, &output);
}
/* should really not happen? */
- if (dal_fixed31_32_lt(output, dal_fixed31_32_zero))
- output = dal_fixed31_32_zero;
- else if (dal_fixed31_32_lt(dal_fixed31_32_one, output))
- output = dal_fixed31_32_one;
+ if (dc_fixpt_lt(output, dc_fixpt_zero))
+ output = dc_fixpt_zero;
+ else if (dc_fixpt_lt(dc_fixpt_one, output))
+ output = dc_fixpt_one;
rgb->r = output;
rgb->g = output;
@@ -605,7 +605,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,
uint32_t i;
struct fixed31_32 output;
- struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125);
+ struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
if (!de_pq_initialized) {
precompute_de_pq();
@@ -616,9 +616,9 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,
for (i = 0; i <= hw_points_num; i++) {
output = de_pq_table[i];
/* should really not happen? */
- if (dal_fixed31_32_lt(output, dal_fixed31_32_zero))
- output = dal_fixed31_32_zero;
- else if (dal_fixed31_32_lt(scaling_factor, output))
+ if (dc_fixpt_lt(output, dc_fixpt_zero))
+ output = dc_fixpt_zero;
+ else if (dc_fixpt_lt(scaling_factor, output))
output = scaling_factor;
de_pq[i].r = output;
de_pq[i].g = output;
@@ -670,9 +670,9 @@ static void build_degamma(struct pwl_float_data_ex *curve,
end_index = begin_index + 12 * NUM_PTS_IN_REGION;
while (i != begin_index) {
- curve[i].r = dal_fixed31_32_zero;
- curve[i].g = dal_fixed31_32_zero;
- curve[i].b = dal_fixed31_32_zero;
+ curve[i].r = dc_fixpt_zero;
+ curve[i].g = dc_fixpt_zero;
+ curve[i].b = dc_fixpt_zero;
i++;
}
@@ -684,19 +684,19 @@ static void build_degamma(struct pwl_float_data_ex *curve,
i++;
}
while (i != hw_points_num + 1) {
- curve[i].r = dal_fixed31_32_one;
- curve[i].g = dal_fixed31_32_one;
- curve[i].b = dal_fixed31_32_one;
+ curve[i].r = dc_fixpt_one;
+ curve[i].g = dc_fixpt_one;
+ curve[i].b = dc_fixpt_one;
i++;
}
}
-static bool scale_gamma(struct pwl_float_data *pwl_rgb,
+static void scale_gamma(struct pwl_float_data *pwl_rgb,
const struct dc_gamma *ramp,
struct dividers dividers)
{
- const struct fixed31_32 max_driver = dal_fixed31_32_from_int(0xFFFF);
- const struct fixed31_32 max_os = dal_fixed31_32_from_int(0xFF00);
+ const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
+ const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
struct fixed31_32 scaler = max_os;
uint32_t i;
struct pwl_float_data *rgb = pwl_rgb;
@@ -705,9 +705,9 @@ static bool scale_gamma(struct pwl_float_data *pwl_rgb,
i = 0;
do {
- if (dal_fixed31_32_lt(max_os, ramp->entries.red[i]) ||
- dal_fixed31_32_lt(max_os, ramp->entries.green[i]) ||
- dal_fixed31_32_lt(max_os, ramp->entries.blue[i])) {
+ if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
+ dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
+ dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
scaler = max_driver;
break;
}
@@ -717,109 +717,170 @@ static bool scale_gamma(struct pwl_float_data *pwl_rgb,
i = 0;
do {
- rgb->r = dal_fixed31_32_div(
+ rgb->r = dc_fixpt_div(
ramp->entries.red[i], scaler);
- rgb->g = dal_fixed31_32_div(
+ rgb->g = dc_fixpt_div(
ramp->entries.green[i], scaler);
- rgb->b = dal_fixed31_32_div(
+ rgb->b = dc_fixpt_div(
ramp->entries.blue[i], scaler);
++rgb;
++i;
} while (i != ramp->num_entries);
- rgb->r = dal_fixed31_32_mul(rgb_last->r,
+ rgb->r = dc_fixpt_mul(rgb_last->r,
dividers.divider1);
- rgb->g = dal_fixed31_32_mul(rgb_last->g,
+ rgb->g = dc_fixpt_mul(rgb_last->g,
dividers.divider1);
- rgb->b = dal_fixed31_32_mul(rgb_last->b,
+ rgb->b = dc_fixpt_mul(rgb_last->b,
dividers.divider1);
++rgb;
- rgb->r = dal_fixed31_32_mul(rgb_last->r,
+ rgb->r = dc_fixpt_mul(rgb_last->r,
dividers.divider2);
- rgb->g = dal_fixed31_32_mul(rgb_last->g,
+ rgb->g = dc_fixpt_mul(rgb_last->g,
dividers.divider2);
- rgb->b = dal_fixed31_32_mul(rgb_last->b,
+ rgb->b = dc_fixpt_mul(rgb_last->b,
dividers.divider2);
++rgb;
- rgb->r = dal_fixed31_32_mul(rgb_last->r,
+ rgb->r = dc_fixpt_mul(rgb_last->r,
dividers.divider3);
- rgb->g = dal_fixed31_32_mul(rgb_last->g,
+ rgb->g = dc_fixpt_mul(rgb_last->g,
dividers.divider3);
- rgb->b = dal_fixed31_32_mul(rgb_last->b,
+ rgb->b = dc_fixpt_mul(rgb_last->b,
dividers.divider3);
-
- return true;
}
-static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb,
+static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
const struct dc_gamma *ramp,
struct dividers dividers)
{
uint32_t i;
- struct fixed31_32 min = dal_fixed31_32_zero;
- struct fixed31_32 max = dal_fixed31_32_one;
+ struct fixed31_32 min = dc_fixpt_zero;
+ struct fixed31_32 max = dc_fixpt_one;
- struct fixed31_32 delta = dal_fixed31_32_zero;
- struct fixed31_32 offset = dal_fixed31_32_zero;
+ struct fixed31_32 delta = dc_fixpt_zero;
+ struct fixed31_32 offset = dc_fixpt_zero;
for (i = 0 ; i < ramp->num_entries; i++) {
- if (dal_fixed31_32_lt(ramp->entries.red[i], min))
+ if (dc_fixpt_lt(ramp->entries.red[i], min))
min = ramp->entries.red[i];
- if (dal_fixed31_32_lt(ramp->entries.green[i], min))
+ if (dc_fixpt_lt(ramp->entries.green[i], min))
min = ramp->entries.green[i];
- if (dal_fixed31_32_lt(ramp->entries.blue[i], min))
+ if (dc_fixpt_lt(ramp->entries.blue[i], min))
min = ramp->entries.blue[i];
- if (dal_fixed31_32_lt(max, ramp->entries.red[i]))
+ if (dc_fixpt_lt(max, ramp->entries.red[i]))
max = ramp->entries.red[i];
- if (dal_fixed31_32_lt(max, ramp->entries.green[i]))
+ if (dc_fixpt_lt(max, ramp->entries.green[i]))
max = ramp->entries.green[i];
- if (dal_fixed31_32_lt(max, ramp->entries.blue[i]))
+ if (dc_fixpt_lt(max, ramp->entries.blue[i]))
max = ramp->entries.blue[i];
}
- if (dal_fixed31_32_lt(min, dal_fixed31_32_zero))
- delta = dal_fixed31_32_neg(min);
+ if (dc_fixpt_lt(min, dc_fixpt_zero))
+ delta = dc_fixpt_neg(min);
- offset = dal_fixed31_32_add(min, max);
+ offset = dc_fixpt_add(min, max);
for (i = 0 ; i < ramp->num_entries; i++) {
- pwl_rgb[i].r = dal_fixed31_32_div(
- dal_fixed31_32_add(
+ pwl_rgb[i].r = dc_fixpt_div(
+ dc_fixpt_add(
ramp->entries.red[i], delta), offset);
- pwl_rgb[i].g = dal_fixed31_32_div(
- dal_fixed31_32_add(
+ pwl_rgb[i].g = dc_fixpt_div(
+ dc_fixpt_add(
ramp->entries.green[i], delta), offset);
- pwl_rgb[i].b = dal_fixed31_32_div(
- dal_fixed31_32_add(
+ pwl_rgb[i].b = dc_fixpt_div(
+ dc_fixpt_add(
ramp->entries.blue[i], delta), offset);
}
- pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
- pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
- pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
++i;
- pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
- pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
- pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+ pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
+}
- return true;
+/* todo: all these scale_gamma functions are inherently the same but
+ * take different structures as params or different format for ramp
+ * values. We could probably implement it in a more generic fashion
+ */
+static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
+ const struct regamma_ramp *ramp,
+ struct dividers dividers)
+{
+ unsigned short max_driver = 0xFFFF;
+ unsigned short max_os = 0xFF00;
+ unsigned short scaler = max_os;
+ uint32_t i;
+ struct pwl_float_data *rgb = pwl_rgb;
+ struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
+
+ i = 0;
+ do {
+ if (ramp->gamma[i] > max_os ||
+ ramp->gamma[i + 256] > max_os ||
+ ramp->gamma[i + 512] > max_os) {
+ scaler = max_driver;
+ break;
+ }
+ i++;
+ } while (i != GAMMA_RGB_256_ENTRIES);
+
+ i = 0;
+ do {
+ rgb->r = dc_fixpt_from_fraction(
+ ramp->gamma[i], scaler);
+ rgb->g = dc_fixpt_from_fraction(
+ ramp->gamma[i + 256], scaler);
+ rgb->b = dc_fixpt_from_fraction(
+ ramp->gamma[i + 512], scaler);
+
+ ++rgb;
+ ++i;
+ } while (i != GAMMA_RGB_256_ENTRIES);
+
+ rgb->r = dc_fixpt_mul(rgb_last->r,
+ dividers.divider1);
+ rgb->g = dc_fixpt_mul(rgb_last->g,
+ dividers.divider1);
+ rgb->b = dc_fixpt_mul(rgb_last->b,
+ dividers.divider1);
+
+ ++rgb;
+
+ rgb->r = dc_fixpt_mul(rgb_last->r,
+ dividers.divider2);
+ rgb->g = dc_fixpt_mul(rgb_last->g,
+ dividers.divider2);
+ rgb->b = dc_fixpt_mul(rgb_last->b,
+ dividers.divider2);
+
+ ++rgb;
+
+ rgb->r = dc_fixpt_mul(rgb_last->r,
+ dividers.divider3);
+ rgb->g = dc_fixpt_mul(rgb_last->g,
+ dividers.divider3);
+ rgb->b = dc_fixpt_mul(rgb_last->b,
+ dividers.divider3);
}
/*
@@ -852,7 +913,7 @@ static void apply_lut_1d(
struct fixed31_32 lut2;
const int max_lut_index = 4095;
const struct fixed31_32 max_lut_index_f =
- dal_fixed31_32_from_int_nonconst(max_lut_index);
+ dc_fixpt_from_int(max_lut_index);
int32_t index = 0, index_next = 0;
struct fixed31_32 index_f;
struct fixed31_32 delta_lut;
@@ -870,10 +931,10 @@ static void apply_lut_1d(
else
regamma_y = &tf_pts->blue[i];
- norm_y = dal_fixed31_32_mul(max_lut_index_f,
+ norm_y = dc_fixpt_mul(max_lut_index_f,
*regamma_y);
- index = dal_fixed31_32_floor(norm_y);
- index_f = dal_fixed31_32_from_int_nonconst(index);
+ index = dc_fixpt_floor(norm_y);
+ index_f = dc_fixpt_from_int(index);
if (index < 0 || index > max_lut_index)
continue;
@@ -892,11 +953,11 @@ static void apply_lut_1d(
}
// we have everything now, so interpolate
- delta_lut = dal_fixed31_32_sub(lut2, lut1);
- delta_index = dal_fixed31_32_sub(norm_y, index_f);
+ delta_lut = dc_fixpt_sub(lut2, lut1);
+ delta_index = dc_fixpt_sub(norm_y, index_f);
- *regamma_y = dal_fixed31_32_add(lut1,
- dal_fixed31_32_mul(delta_index, delta_lut));
+ *regamma_y = dc_fixpt_add(lut1,
+ dc_fixpt_mul(delta_index, delta_lut));
}
}
}
@@ -912,7 +973,7 @@ static void build_evenly_distributed_points(
uint32_t i = 0;
do {
- struct fixed31_32 value = dal_fixed31_32_from_fraction(i,
+ struct fixed31_32 value = dc_fixpt_from_fraction(i,
numberof_points - 1);
p->r = value;
@@ -923,21 +984,21 @@ static void build_evenly_distributed_points(
++i;
} while (i != numberof_points);
- p->r = dal_fixed31_32_div(p_last->r, dividers.divider1);
- p->g = dal_fixed31_32_div(p_last->g, dividers.divider1);
- p->b = dal_fixed31_32_div(p_last->b, dividers.divider1);
+ p->r = dc_fixpt_div(p_last->r, dividers.divider1);
+ p->g = dc_fixpt_div(p_last->g, dividers.divider1);
+ p->b = dc_fixpt_div(p_last->b, dividers.divider1);
++p;
- p->r = dal_fixed31_32_div(p_last->r, dividers.divider2);
- p->g = dal_fixed31_32_div(p_last->g, dividers.divider2);
- p->b = dal_fixed31_32_div(p_last->b, dividers.divider2);
+ p->r = dc_fixpt_div(p_last->r, dividers.divider2);
+ p->g = dc_fixpt_div(p_last->g, dividers.divider2);
+ p->b = dc_fixpt_div(p_last->b, dividers.divider2);
++p;
- p->r = dal_fixed31_32_div(p_last->r, dividers.divider3);
- p->g = dal_fixed31_32_div(p_last->g, dividers.divider3);
- p->b = dal_fixed31_32_div(p_last->b, dividers.divider3);
+ p->r = dc_fixpt_div(p_last->r, dividers.divider3);
+ p->g = dc_fixpt_div(p_last->g, dividers.divider3);
+ p->b = dc_fixpt_div(p_last->b, dividers.divider3);
}
static inline void copy_rgb_regamma_to_coordinates_x(
@@ -949,7 +1010,7 @@ static inline void copy_rgb_regamma_to_coordinates_x(
uint32_t i = 0;
const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
- while (i <= hw_points_num) {
+ while (i <= hw_points_num + 1) {
coords->regamma_y_red = rgb_regamma->r;
coords->regamma_y_green = rgb_regamma->g;
coords->regamma_y_blue = rgb_regamma->b;
@@ -1002,6 +1063,102 @@ static bool calculate_interpolated_hardware_curve(
return true;
}
+/* The "old" interpolation uses a complicated scheme to build an array of
+ * coefficients while also using an array of 0-255 normalized to 0-1
+ * Then there's another loop using both of the above + new scaled user ramp
+ * and we concatenate them. It also searches for points of interpolation and
+ * uses enums for positions.
+ *
+ * This function uses a different approach:
+ * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
+ * To find index for hwX , we notice the following:
+ * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1
+ * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
+ *
+ * Once the index is known, combined Y is simply:
+ * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
+ *
+ * We should switch to this method in all cases, it's simpler and faster
+ * ToDo one day - for now this only applies to ADL regamma to avoid regression
+ * for regular use cases (sRGB and PQ)
+ */
+static void interpolate_user_regamma(uint32_t hw_points_num,
+ struct pwl_float_data *rgb_user,
+ bool apply_degamma,
+ struct dc_transfer_func_distributed_points *tf_pts)
+{
+ uint32_t i;
+ uint32_t color = 0;
+ int32_t index;
+ int32_t index_next;
+ struct fixed31_32 *tf_point;
+ struct fixed31_32 hw_x;
+ struct fixed31_32 norm_factor =
+ dc_fixpt_from_int(255);
+ struct fixed31_32 norm_x;
+ struct fixed31_32 index_f;
+ struct fixed31_32 lut1;
+ struct fixed31_32 lut2;
+ struct fixed31_32 delta_lut;
+ struct fixed31_32 delta_index;
+
+ i = 0;
+ /* fixed_pt library has problems handling too small values */
+ while (i != 32) {
+ tf_pts->red[i] = dc_fixpt_zero;
+ tf_pts->green[i] = dc_fixpt_zero;
+ tf_pts->blue[i] = dc_fixpt_zero;
+ ++i;
+ }
+ while (i <= hw_points_num + 1) {
+ for (color = 0; color < 3; color++) {
+ if (color == 0)
+ tf_point = &tf_pts->red[i];
+ else if (color == 1)
+ tf_point = &tf_pts->green[i];
+ else
+ tf_point = &tf_pts->blue[i];
+
+ if (apply_degamma) {
+ if (color == 0)
+ hw_x = coordinates_x[i].regamma_y_red;
+ else if (color == 1)
+ hw_x = coordinates_x[i].regamma_y_green;
+ else
+ hw_x = coordinates_x[i].regamma_y_blue;
+ } else
+ hw_x = coordinates_x[i].x;
+
+ norm_x = dc_fixpt_mul(norm_factor, hw_x);
+ index = dc_fixpt_floor(norm_x);
+ if (index < 0 || index > 255)
+ continue;
+
+ index_f = dc_fixpt_from_int(index);
+ index_next = (index == 255) ? index : index + 1;
+
+ if (color == 0) {
+ lut1 = rgb_user[index].r;
+ lut2 = rgb_user[index_next].r;
+ } else if (color == 1) {
+ lut1 = rgb_user[index].g;
+ lut2 = rgb_user[index_next].g;
+ } else {
+ lut1 = rgb_user[index].b;
+ lut2 = rgb_user[index_next].b;
+ }
+
+ // we have everything now, so interpolate
+ delta_lut = dc_fixpt_sub(lut2, lut1);
+ delta_index = dc_fixpt_sub(norm_x, index_f);
+
+ *tf_point = dc_fixpt_add(lut1,
+ dc_fixpt_mul(delta_index, delta_lut));
+ }
+ ++i;
+ }
+}
+
static void build_new_custom_resulted_curve(
uint32_t hw_points_num,
struct dc_transfer_func_distributed_points *tf_pts)
@@ -1011,16 +1168,39 @@ static void build_new_custom_resulted_curve(
i = 0;
while (i != hw_points_num + 1) {
- tf_pts->red[i] = dal_fixed31_32_clamp(
- tf_pts->red[i], dal_fixed31_32_zero,
- dal_fixed31_32_one);
- tf_pts->green[i] = dal_fixed31_32_clamp(
- tf_pts->green[i], dal_fixed31_32_zero,
- dal_fixed31_32_one);
- tf_pts->blue[i] = dal_fixed31_32_clamp(
- tf_pts->blue[i], dal_fixed31_32_zero,
- dal_fixed31_32_one);
+ tf_pts->red[i] = dc_fixpt_clamp(
+ tf_pts->red[i], dc_fixpt_zero,
+ dc_fixpt_one);
+ tf_pts->green[i] = dc_fixpt_clamp(
+ tf_pts->green[i], dc_fixpt_zero,
+ dc_fixpt_one);
+ tf_pts->blue[i] = dc_fixpt_clamp(
+ tf_pts->blue[i], dc_fixpt_zero,
+ dc_fixpt_one);
+
+ ++i;
+ }
+}
+
+static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
+ uint32_t hw_points_num)
+{
+ uint32_t i;
+
+ struct gamma_coefficients coeff;
+ struct pwl_float_data_ex *rgb = rgb_regamma;
+ const struct hw_x_point *coord_x = coordinates_x;
+
+ build_coefficients(&coeff, true);
+ i = 0;
+ while (i != hw_points_num + 1) {
+ rgb->r = translate_from_linear_space_ex(
+ coord_x->x, &coeff, 0);
+ rgb->g = rgb->r;
+ rgb->b = rgb->r;
+ ++coord_x;
+ ++rgb;
++i;
}
}
@@ -1062,6 +1242,7 @@ static bool map_regamma_hw_to_x_user(
}
}
+ /* this should be named differently, all it does is clamp to 0-1 */
build_new_custom_resulted_curve(hw_points_num, tf_pts);
return true;
@@ -1109,9 +1290,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
if (!coeff)
goto coeff_alloc_fail;
- dividers.divider1 = dal_fixed31_32_from_fraction(3, 2);
- dividers.divider2 = dal_fixed31_32_from_int(2);
- dividers.divider3 = dal_fixed31_32_from_fraction(5, 2);
+ dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+ dividers.divider2 = dc_fixpt_from_int(2);
+ dividers.divider3 = dc_fixpt_from_fraction(5, 2);
tf = output_tf->tf;
@@ -1168,6 +1349,113 @@ rgb_user_alloc_fail:
return ret;
}
+bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
+ const struct regamma_lut *regamma)
+{
+ struct gamma_coefficients coeff;
+ const struct hw_x_point *coord_x = coordinates_x;
+ uint32_t i = 0;
+
+ do {
+ coeff.a0[i] = dc_fixpt_from_fraction(
+ regamma->coeff.A0[i], 10000000);
+ coeff.a1[i] = dc_fixpt_from_fraction(
+ regamma->coeff.A1[i], 1000);
+ coeff.a2[i] = dc_fixpt_from_fraction(
+ regamma->coeff.A2[i], 1000);
+ coeff.a3[i] = dc_fixpt_from_fraction(
+ regamma->coeff.A3[i], 1000);
+ coeff.user_gamma[i] = dc_fixpt_from_fraction(
+ regamma->coeff.gamma[i], 1000);
+
+ ++i;
+ } while (i != 3);
+
+ i = 0;
+ /* fixed_pt library has problems handling too small values */
+ while (i != 32) {
+ output_tf->tf_pts.red[i] = dc_fixpt_zero;
+ output_tf->tf_pts.green[i] = dc_fixpt_zero;
+ output_tf->tf_pts.blue[i] = dc_fixpt_zero;
+ ++coord_x;
+ ++i;
+ }
+ while (i != MAX_HW_POINTS + 1) {
+ output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
+ coord_x->x, &coeff, 0);
+ output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
+ coord_x->x, &coeff, 1);
+ output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
+ coord_x->x, &coeff, 2);
+ ++coord_x;
+ ++i;
+ }
+
+ // this function just clamps output to 0-1
+ build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
+ output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+
+ return true;
+}
+
+bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
+ const struct regamma_lut *regamma)
+{
+ struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
+ struct dividers dividers;
+
+ struct pwl_float_data *rgb_user = NULL;
+ struct pwl_float_data_ex *rgb_regamma = NULL;
+ bool ret = false;
+
+ if (regamma == NULL)
+ return false;
+
+ output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+
+ rgb_user = kzalloc(sizeof(*rgb_user) * (GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS),
+ GFP_KERNEL);
+ if (!rgb_user)
+ goto rgb_user_alloc_fail;
+
+ rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
+ GFP_KERNEL);
+ if (!rgb_regamma)
+ goto rgb_regamma_alloc_fail;
+
+ dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+ dividers.divider2 = dc_fixpt_from_int(2);
+ dividers.divider3 = dc_fixpt_from_fraction(5, 2);
+
+ scale_user_regamma_ramp(rgb_user, &regamma->ramp, dividers);
+
+ if (regamma->flags.bits.applyDegamma == 1) {
+ apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS);
+ copy_rgb_regamma_to_coordinates_x(coordinates_x,
+ MAX_HW_POINTS, rgb_regamma);
+ }
+
+ interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
+ regamma->flags.bits.applyDegamma, tf_pts);
+
+ // no custom HDR curves!
+ tf_pts->end_exponent = 0;
+ tf_pts->x_point_at_y1_red = 1;
+ tf_pts->x_point_at_y1_green = 1;
+ tf_pts->x_point_at_y1_blue = 1;
+
+ // this function just clamps output to 0-1
+ build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
+
+ ret = true;
+
+ kfree(rgb_regamma);
+rgb_regamma_alloc_fail:
+ kvfree(rgb_user);
+rgb_user_alloc_fail:
+ return ret;
+}
+
bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
const struct dc_gamma *ramp, bool mapUserRamp)
{
@@ -1208,9 +1496,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
if (!coeff)
goto coeff_alloc_fail;
- dividers.divider1 = dal_fixed31_32_from_fraction(3, 2);
- dividers.divider2 = dal_fixed31_32_from_int(2);
- dividers.divider3 = dal_fixed31_32_from_fraction(5, 2);
+ dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+ dividers.divider2 = dc_fixpt_from_int(2);
+ dividers.divider3 = dc_fixpt_from_fraction(5, 2);
tf = input_tf->tf;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index b7f9bc27d101..b64048991a95 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -32,6 +32,47 @@ struct dc_transfer_func_distributed_points;
struct dc_rgb_fixed;
enum dc_transfer_func_predefined;
+/* For SetRegamma ADL interface support
+ * Must match escape type
+ */
+union regamma_flags {
+ unsigned int raw;
+ struct {
+ unsigned int gammaRampArray :1; // RegammaRamp is in use
+ unsigned int gammaFromEdid :1; //gamma from edid is in use
+ unsigned int gammaFromEdidEx :1; //gamma from edid is in use , but only for Display Id 1.2
+ unsigned int gammaFromUser :1; //user custom gamma is used
+ unsigned int coeffFromUser :1; //coeff. A0-A3 from user is in use
+ unsigned int coeffFromEdid :1; //coeff. A0-A3 from edid is in use
+ unsigned int applyDegamma :1; //flag for additional degamma correction in driver
+ unsigned int gammaPredefinedSRGB :1; //flag for SRGB gamma
+ unsigned int gammaPredefinedPQ :1; //flag for PQ gamma
+ unsigned int gammaPredefinedPQ2084Interim :1; //flag for PQ gamma, lower max nits
+ unsigned int gammaPredefined36 :1; //flag for 3.6 gamma
+ unsigned int gammaPredefinedReset :1; //flag to return to previous gamma
+ } bits;
+};
+
+struct regamma_ramp {
+ unsigned short gamma[256*3]; // gamma ramp packed in same way as OS windows ,r , g & b
+};
+
+struct regamma_coeff {
+ int gamma[3];
+ int A0[3];
+ int A1[3];
+ int A2[3];
+ int A3[3];
+};
+
+struct regamma_lut {
+ union regamma_flags flags;
+ union {
+ struct regamma_ramp ramp;
+ struct regamma_coeff coeff;
+ };
+};
+
void setup_x_points_distribution(void);
void precompute_pq(void);
void precompute_de_pq(void);
@@ -45,9 +86,14 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf,
bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
struct dc_transfer_func_distributed_points *points);
-bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
+bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
struct dc_transfer_func_distributed_points *points);
+bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
+ const struct regamma_lut *regamma);
+
+bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
+ const struct regamma_lut *regamma);
#endif /* COLOR_MOD_COLOR_GAMMA_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
index 3230e2adb870..3812094b52e8 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
@@ -46,6 +46,10 @@ void mod_stats_dump(struct mod_stats *mod_stats);
void mod_stats_reset_data(struct mod_stats *mod_stats);
+void mod_stats_update_event(struct mod_stats *mod_stats,
+ char *event_string,
+ unsigned int length);
+
void mod_stats_update_flip(struct mod_stats *mod_stats,
unsigned long timestamp_in_ns);
diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c
index 041f87b73d5f..3f7d47fdc367 100644
--- a/drivers/gpu/drm/amd/display/modules/stats/stats.c
+++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c
@@ -36,9 +36,14 @@
#define DAL_STATS_ENTRIES_REGKEY_DEFAULT 0x00350000
#define DAL_STATS_ENTRIES_REGKEY_MAX 0x01000000
+#define DAL_STATS_EVENT_ENTRIES_DEFAULT 0x00000100
+
#define MOD_STATS_NUM_VSYNCS 5
+#define MOD_STATS_EVENT_STRING_MAX 512
struct stats_time_cache {
+ unsigned int entry_id;
+
unsigned long flip_timestamp_in_ns;
unsigned long vupdate_timestamp_in_ns;
@@ -63,15 +68,26 @@ struct stats_time_cache {
unsigned int flags;
};
+struct stats_event_cache {
+ unsigned int entry_id;
+ char event_string[MOD_STATS_EVENT_STRING_MAX];
+};
+
struct core_stats {
struct mod_stats public;
struct dc *dc;
+ bool enabled;
+ unsigned int entries;
+ unsigned int event_entries;
+ unsigned int entry_id;
+
struct stats_time_cache *time;
unsigned int index;
- bool enabled;
- unsigned int entries;
+ struct stats_event_cache *events;
+ unsigned int event_index;
+
};
#define MOD_STATS_TO_CORE(mod_stats)\
@@ -99,12 +115,12 @@ struct mod_stats *mod_stats_create(struct dc *dc)
unsigned int reg_data;
int i = 0;
+ if (dc == NULL)
+ goto fail_construct;
+
core_stats = kzalloc(sizeof(struct core_stats), GFP_KERNEL);
if (core_stats == NULL)
- goto fail_alloc_context;
-
- if (dc == NULL)
goto fail_construct;
core_stats->dc = dc;
@@ -115,33 +131,55 @@ struct mod_stats *mod_stats_create(struct dc *dc)
&reg_data, sizeof(unsigned int), &flag))
core_stats->enabled = reg_data;
- core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT;
- if (dm_read_persistent_data(dc->ctx, NULL, NULL,
- DAL_STATS_ENTRIES_REGKEY,
- &reg_data, sizeof(unsigned int), &flag)) {
- if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX)
- core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX;
- else
- core_stats->entries = reg_data;
- }
+ if (core_stats->enabled) {
+ core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT;
+ if (dm_read_persistent_data(dc->ctx, NULL, NULL,
+ DAL_STATS_ENTRIES_REGKEY,
+ &reg_data, sizeof(unsigned int), &flag)) {
+ if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX)
+ core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX;
+ else
+ core_stats->entries = reg_data;
+ }
+ core_stats->time = kzalloc(
+ sizeof(struct stats_time_cache) *
+ core_stats->entries,
+ GFP_KERNEL);
- core_stats->time = kzalloc(sizeof(struct stats_time_cache) * core_stats->entries,
- GFP_KERNEL);
+ if (core_stats->time == NULL)
+ goto fail_construct_time;
- if (core_stats->time == NULL)
- goto fail_construct;
+ core_stats->event_entries = DAL_STATS_EVENT_ENTRIES_DEFAULT;
+ core_stats->events = kzalloc(
+ sizeof(struct stats_event_cache) *
+ core_stats->event_entries,
+ GFP_KERNEL);
+
+ if (core_stats->events == NULL)
+ goto fail_construct_events;
+
+ } else {
+ core_stats->entries = 0;
+ }
/* Purposely leave index 0 unused so we don't need special logic to
* handle calculation cases that depend on previous flip data.
*/
core_stats->index = 1;
+ core_stats->event_index = 0;
+
+ // Keeps track of ordering within the different stats structures
+ core_stats->entry_id = 0;
return &core_stats->public;
-fail_construct:
+fail_construct_events:
+ kfree(core_stats->time);
+
+fail_construct_time:
kfree(core_stats);
-fail_alloc_context:
+fail_construct:
return NULL;
}
@@ -153,6 +191,9 @@ void mod_stats_destroy(struct mod_stats *mod_stats)
if (core_stats->time != NULL)
kfree(core_stats->time);
+ if (core_stats->events != NULL)
+ kfree(core_stats->events);
+
kfree(core_stats);
}
}
@@ -163,7 +204,11 @@ void mod_stats_dump(struct mod_stats *mod_stats)
struct dal_logger *logger = NULL;
struct core_stats *core_stats = NULL;
struct stats_time_cache *time = NULL;
+ struct stats_event_cache *events = NULL;
+ unsigned int time_index = 1;
+ unsigned int event_index = 0;
unsigned int index = 0;
+ struct log_entry log_entry;
if (mod_stats == NULL)
return;
@@ -172,45 +217,62 @@ void mod_stats_dump(struct mod_stats *mod_stats)
dc = core_stats->dc;
logger = dc->ctx->logger;
time = core_stats->time;
-
- //LogEntry* pLog = GetLog()->Open(LogMajor_ISR, LogMinor_ISR_FreeSyncSW);
-
- //if (!pLog->IsDummyEntry())
- {
- dm_logger_write(logger, LOG_PROFILING, "==Display Caps==\n");
- dm_logger_write(logger, LOG_PROFILING, "\n");
- dm_logger_write(logger, LOG_PROFILING, "\n");
-
- dm_logger_write(logger, LOG_PROFILING, "==Stats==\n");
- dm_logger_write(logger, LOG_PROFILING,
- "render avgRender minWindow midPoint maxWindow vsyncToFlip flipToVsync #vsyncBetweenFlip #frame insertDuration vTotalMin vTotalMax eventTrigs vSyncTime1 vSyncTime2 vSyncTime3 vSyncTime4 vSyncTime5 flags\n");
-
- for (int i = 0; i < core_stats->index && i < core_stats->entries; i++) {
- dm_logger_write(logger, LOG_PROFILING,
- "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n",
- time[i].render_time_in_us,
- time[i].avg_render_time_in_us_last_ten,
- time[i].min_window,
- time[i].lfc_mid_point_in_us,
- time[i].max_window,
- time[i].vsync_to_flip_time_in_us,
- time[i].flip_to_vsync_time_in_us,
- time[i].num_vsync_between_flips,
- time[i].num_frames_inserted,
- time[i].inserted_duration_in_us,
- time[i].v_total_min,
- time[i].v_total_max,
- time[i].event_triggers,
- time[i].v_sync_time_in_us[0],
- time[i].v_sync_time_in_us[1],
- time[i].v_sync_time_in_us[2],
- time[i].v_sync_time_in_us[3],
- time[i].v_sync_time_in_us[4],
- time[i].flags);
+ events = core_stats->events;
+
+ DISPLAY_STATS_BEGIN(log_entry);
+
+ DISPLAY_STATS("==Display Caps==\n");
+
+ DISPLAY_STATS("==Display Stats==\n");
+
+ DISPLAY_STATS("%10s %10s %10s %10s %10s"
+ " %11s %11s %17s %10s %14s"
+ " %10s %10s %10s %10s %10s"
+ " %10s %10s %10s %10s\n",
+ "render", "avgRender",
+ "minWindow", "midPoint", "maxWindow",
+ "vsyncToFlip", "flipToVsync", "vsyncsBetweenFlip",
+ "numFrame", "insertDuration",
+ "vTotalMin", "vTotalMax", "eventTrigs",
+ "vSyncTime1", "vSyncTime2", "vSyncTime3",
+ "vSyncTime4", "vSyncTime5", "flags");
+
+ for (int i = 0; i < core_stats->entry_id; i++) {
+ if (event_index < core_stats->event_index &&
+ i == events[event_index].entry_id) {
+ DISPLAY_STATS("%s\n", events[event_index].event_string);
+ event_index++;
+ } else if (time_index < core_stats->index &&
+ i == time[time_index].entry_id) {
+ DISPLAY_STATS("%10u %10u %10u %10u %10u"
+ " %11u %11u %17u %10u %14u"
+ " %10u %10u %10u %10u %10u"
+ " %10u %10u %10u %10u\n",
+ time[time_index].render_time_in_us,
+ time[time_index].avg_render_time_in_us_last_ten,
+ time[time_index].min_window,
+ time[time_index].lfc_mid_point_in_us,
+ time[time_index].max_window,
+ time[time_index].vsync_to_flip_time_in_us,
+ time[time_index].flip_to_vsync_time_in_us,
+ time[time_index].num_vsync_between_flips,
+ time[time_index].num_frames_inserted,
+ time[time_index].inserted_duration_in_us,
+ time[time_index].v_total_min,
+ time[time_index].v_total_max,
+ time[time_index].event_triggers,
+ time[time_index].v_sync_time_in_us[0],
+ time[time_index].v_sync_time_in_us[1],
+ time[time_index].v_sync_time_in_us[2],
+ time[time_index].v_sync_time_in_us[3],
+ time[time_index].v_sync_time_in_us[4],
+ time[time_index].flags);
+
+ time_index++;
}
}
- //GetLog()->Close(pLog);
- //GetLog()->UnSetLogMask(LogMajor_ISR, LogMinor_ISR_FreeSyncSW);
+
+ DISPLAY_STATS_END(log_entry);
}
void mod_stats_reset_data(struct mod_stats *mod_stats)
@@ -227,7 +289,46 @@ void mod_stats_reset_data(struct mod_stats *mod_stats)
memset(core_stats->time, 0,
sizeof(struct stats_time_cache) * core_stats->entries);
- core_stats->index = 0;
+ memset(core_stats->events, 0,
+ sizeof(struct stats_event_cache) * core_stats->event_entries);
+
+ core_stats->index = 1;
+ core_stats->event_index = 0;
+
+ // Keeps track of ordering within the different stats structures
+ core_stats->entry_id = 0;
+}
+
+void mod_stats_update_event(struct mod_stats *mod_stats,
+ char *event_string,
+ unsigned int length)
+{
+ struct core_stats *core_stats = NULL;
+ struct stats_event_cache *events = NULL;
+ unsigned int index = 0;
+ unsigned int copy_length = 0;
+
+ if (mod_stats == NULL)
+ return;
+
+ core_stats = MOD_STATS_TO_CORE(mod_stats);
+
+ if (core_stats->event_index >= core_stats->event_entries)
+ return;
+
+ events = core_stats->events;
+ index = core_stats->event_index;
+
+ copy_length = length;
+ if (length > MOD_STATS_EVENT_STRING_MAX)
+ copy_length = MOD_STATS_EVENT_STRING_MAX;
+
+ memcpy(&events[index].event_string, event_string, copy_length);
+ events[index].event_string[copy_length - 1] = '\0';
+
+ events[index].entry_id = core_stats->entry_id;
+ core_stats->event_index++;
+ core_stats->entry_id++;
}
void mod_stats_update_flip(struct mod_stats *mod_stats,
@@ -250,7 +351,7 @@ void mod_stats_update_flip(struct mod_stats *mod_stats,
time[index].flip_timestamp_in_ns = timestamp_in_ns;
time[index].render_time_in_us =
- timestamp_in_ns - time[index - 1].flip_timestamp_in_ns;
+ (timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000;
if (index >= 10) {
for (unsigned int i = 0; i < 10; i++)
@@ -261,12 +362,16 @@ void mod_stats_update_flip(struct mod_stats *mod_stats,
if (time[index].num_vsync_between_flips > 0)
time[index].vsync_to_flip_time_in_us =
- timestamp_in_ns - time[index].vupdate_timestamp_in_ns;
+ (timestamp_in_ns -
+ time[index].vupdate_timestamp_in_ns) / 1000;
else
time[index].vsync_to_flip_time_in_us =
- timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns;
+ (timestamp_in_ns -
+ time[index - 1].vupdate_timestamp_in_ns) / 1000;
+ time[index].entry_id = core_stats->entry_id;
core_stats->index++;
+ core_stats->entry_id++;
}
void mod_stats_update_vupdate(struct mod_stats *mod_stats,
@@ -275,6 +380,8 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats,
struct core_stats *core_stats = NULL;
struct stats_time_cache *time = NULL;
unsigned int index = 0;
+ unsigned int num_vsyncs = 0;
+ unsigned int prev_vsync_in_ns = 0;
if (mod_stats == NULL)
return;
@@ -286,14 +393,27 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats,
time = core_stats->time;
index = core_stats->index;
+ num_vsyncs = time[index].num_vsync_between_flips;
+
+ if (num_vsyncs < MOD_STATS_NUM_VSYNCS) {
+ if (num_vsyncs == 0) {
+ prev_vsync_in_ns =
+ time[index - 1].vupdate_timestamp_in_ns;
+
+ time[index].flip_to_vsync_time_in_us =
+ (timestamp_in_ns -
+ time[index - 1].flip_timestamp_in_ns) /
+ 1000;
+ } else {
+ prev_vsync_in_ns =
+ time[index].vupdate_timestamp_in_ns;
+ }
- time[index].vupdate_timestamp_in_ns = timestamp_in_ns;
- if (time[index].num_vsync_between_flips < MOD_STATS_NUM_VSYNCS)
- time[index].v_sync_time_in_us[time[index].num_vsync_between_flips] =
- timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns;
- time[index].flip_to_vsync_time_in_us =
- timestamp_in_ns - time[index - 1].flip_timestamp_in_ns;
+ time[index].v_sync_time_in_us[num_vsyncs] =
+ (timestamp_in_ns - prev_vsync_in_ns) / 1000;
+ }
+ time[index].vupdate_timestamp_in_ns = timestamp_in_ns;
time[index].num_vsync_between_flips++;
}
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 9fa3aaef3f33..b178176b72ac 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -92,7 +92,7 @@ enum amd_powergating_state {
#define AMD_CG_SUPPORT_GFX_3D_CGLS (1 << 21)
#define AMD_CG_SUPPORT_DRM_MGCG (1 << 22)
#define AMD_CG_SUPPORT_DF_MGCG (1 << 23)
-
+#define AMD_CG_SUPPORT_VCN_MGCG (1 << 24)
/* PG flags */
#define AMD_PG_SUPPORT_GFX_PG (1 << 0)
#define AMD_PG_SUPPORT_GFX_SMG (1 << 1)
@@ -108,6 +108,27 @@ enum amd_powergating_state {
#define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11)
#define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12)
#define AMD_PG_SUPPORT_MMHUB (1 << 13)
+#define AMD_PG_SUPPORT_VCN (1 << 14)
+
+enum PP_FEATURE_MASK {
+ PP_SCLK_DPM_MASK = 0x1,
+ PP_MCLK_DPM_MASK = 0x2,
+ PP_PCIE_DPM_MASK = 0x4,
+ PP_SCLK_DEEP_SLEEP_MASK = 0x8,
+ PP_POWER_CONTAINMENT_MASK = 0x10,
+ PP_UVD_HANDSHAKE_MASK = 0x20,
+ PP_SMC_VOLTAGE_CONTROL_MASK = 0x40,
+ PP_VBI_TIME_SUPPORT_MASK = 0x80,
+ PP_ULV_MASK = 0x100,
+ PP_ENABLE_GFX_CG_THRU_SMU = 0x200,
+ PP_CLOCK_STRETCH_MASK = 0x400,
+ PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800,
+ PP_SOCCLK_DPM_MASK = 0x1000,
+ PP_DCEFCLK_DPM_MASK = 0x2000,
+ PP_OVERDRIVE_MASK = 0x4000,
+ PP_GFXOFF_MASK = 0x8000,
+ PP_ACG_MASK = 0x10000,
+};
struct amd_ip_funcs {
/* Name of IP block */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
index f730d0629020..b6f74bf4af02 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
@@ -2095,6 +2095,18 @@
#define mmDC_GPIO_AUX_CTRL_2_BASE_IDX 2
#define mmDC_GPIO_RXEN 0x212f
#define mmDC_GPIO_RXEN_BASE_IDX 2
+#define mmDC_GPIO_AUX_CTRL_3 0x2130
+#define mmDC_GPIO_AUX_CTRL_3_BASE_IDX 2
+#define mmDC_GPIO_AUX_CTRL_4 0x2131
+#define mmDC_GPIO_AUX_CTRL_4_BASE_IDX 2
+#define mmDC_GPIO_AUX_CTRL_5 0x2132
+#define mmDC_GPIO_AUX_CTRL_5_BASE_IDX 2
+#define mmAUXI2C_PAD_ALL_PWR_OK 0x2133
+#define mmAUXI2C_PAD_ALL_PWR_OK_BASE_IDX 2
+#define mmDC_GPIO_PULLUPEN 0x2134
+#define mmDC_GPIO_PULLUPEN_BASE_IDX 2
+#define mmDC_GPIO_AUX_CTRL_6 0x2135
+#define mmDC_GPIO_AUX_CTRL_6_BASE_IDX 2
#define mmBPHYC_DAC_MACRO_CNTL 0x2136
#define mmBPHYC_DAC_MACRO_CNTL_BASE_IDX 2
#define mmDAC_MACRO_CNTL_RESERVED0 0x2136
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
index 6d3162c42957..bcd190a3fcdd 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
@@ -10971,6 +10971,158 @@
#define DC_GPIO_RXEN__DC_GPIO_BLON_RXEN_MASK 0x00100000L
#define DC_GPIO_RXEN__DC_GPIO_DIGON_RXEN_MASK 0x00200000L
#define DC_GPIO_RXEN__DC_GPIO_ENA_BL_RXEN_MASK 0x00400000L
+//DC_GPIO_AUX_CTRL_3
+#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM__SHIFT 0x0
+#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM__SHIFT 0x1
+#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM__SHIFT 0x2
+#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM__SHIFT 0x3
+#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM__SHIFT 0x4
+#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM__SHIFT 0x5
+#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP__SHIFT 0x8
+#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP__SHIFT 0x9
+#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP__SHIFT 0xa
+#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP__SHIFT 0xb
+#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP__SHIFT 0xc
+#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP__SHIFT 0xd
+#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE__SHIFT 0x10
+#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE__SHIFT 0x12
+#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE__SHIFT 0x14
+#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE__SHIFT 0x16
+#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE__SHIFT 0x18
+#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE__SHIFT 0x1a
+#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM_MASK 0x00000001L
+#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM_MASK 0x00000002L
+#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM_MASK 0x00000004L
+#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM_MASK 0x00000008L
+#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM_MASK 0x00000010L
+#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM_MASK 0x00000020L
+#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP_MASK 0x00000100L
+#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP_MASK 0x00000200L
+#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP_MASK 0x00000400L
+#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP_MASK 0x00000800L
+#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP_MASK 0x00001000L
+#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP_MASK 0x00002000L
+#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE_MASK 0x00030000L
+#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE_MASK 0x000C0000L
+#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE_MASK 0x00300000L
+#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE_MASK 0x00C00000L
+#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE_MASK 0x03000000L
+#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE_MASK 0x0C000000L
+//DC_GPIO_AUX_CTRL_4
+#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL__SHIFT 0x0
+#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL__SHIFT 0x4
+#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL__SHIFT 0x8
+#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL__SHIFT 0xc
+#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL__SHIFT 0x10
+#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL__SHIFT 0x14
+#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL_MASK 0x0000000FL
+#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL_MASK 0x000000F0L
+#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL_MASK 0x00000F00L
+#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL_MASK 0x0000F000L
+#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL_MASK 0x000F0000L
+#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL_MASK 0x00F00000L
+//DC_GPIO_AUX_CTRL_5
+#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE__SHIFT 0x0
+#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE__SHIFT 0x2
+#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE__SHIFT 0x4
+#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE__SHIFT 0x6
+#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE__SHIFT 0x8
+#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE__SHIFT 0xa
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE__SHIFT 0xc
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE__SHIFT 0xd
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE__SHIFT 0xe
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE__SHIFT 0xf
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE__SHIFT 0x10
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE__SHIFT 0x11
+#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN__SHIFT 0x12
+#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN__SHIFT 0x13
+#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN__SHIFT 0x14
+#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN__SHIFT 0x15
+#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN__SHIFT 0x16
+#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN__SHIFT 0x17
+#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL__SHIFT 0x18
+#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL__SHIFT 0x19
+#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL__SHIFT 0x1a
+#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL__SHIFT 0x1b
+#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL__SHIFT 0x1c
+#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL__SHIFT 0x1d
+#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE_MASK 0x00000003L
+#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE_MASK 0x0000000CL
+#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE_MASK 0x00000030L
+#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE_MASK 0x000000C0L
+#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE_MASK 0x00000300L
+#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE_MASK 0x00000C00L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE_MASK 0x00001000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE_MASK 0x00002000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE_MASK 0x00004000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE_MASK 0x00008000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE_MASK 0x00010000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE_MASK 0x00020000L
+#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN_MASK 0x00040000L
+#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN_MASK 0x00080000L
+#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN_MASK 0x00100000L
+#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN_MASK 0x00200000L
+#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN_MASK 0x00400000L
+#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN_MASK 0x00800000L
+#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL_MASK 0x01000000L
+#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL_MASK 0x02000000L
+#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL_MASK 0x04000000L
+#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL_MASK 0x08000000L
+#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL_MASK 0x10000000L
+#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL_MASK 0x20000000L
+//AUXI2C_PAD_ALL_PWR_OK
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK__SHIFT 0x0
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK__SHIFT 0x1
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK__SHIFT 0x2
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK__SHIFT 0x3
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK__SHIFT 0x4
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK__SHIFT 0x5
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK_MASK 0x00000001L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK_MASK 0x00000002L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK_MASK 0x00000004L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK_MASK 0x00000008L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK_MASK 0x00000010L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK_MASK 0x00000020L
+//DC_GPIO_PULLUPEN
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN__SHIFT 0x0
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN__SHIFT 0x1
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN__SHIFT 0x2
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN__SHIFT 0x3
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN__SHIFT 0x4
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN__SHIFT 0x5
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN__SHIFT 0x6
+#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN__SHIFT 0x8
+#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN__SHIFT 0x9
+#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN__SHIFT 0xe
+#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN__SHIFT 0x14
+#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN__SHIFT 0x15
+#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN__SHIFT 0x16
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN_MASK 0x00000001L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN_MASK 0x00000002L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN_MASK 0x00000004L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN_MASK 0x00000008L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN_MASK 0x00000010L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN_MASK 0x00000020L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN_MASK 0x00000040L
+#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN_MASK 0x00000100L
+#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN_MASK 0x00000200L
+#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN_MASK 0x00004000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN_MASK 0x00100000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN_MASK 0x00200000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN_MASK 0x00400000L
+//DC_GPIO_AUX_CTRL_6
+#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL__SHIFT 0x0
+#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL__SHIFT 0x2
+#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL__SHIFT 0x4
+#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL__SHIFT 0x6
+#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL__SHIFT 0x8
+#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL__SHIFT 0xa
+#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL_MASK 0x00000003L
+#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL_MASK 0x0000000CL
+#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL_MASK 0x00000030L
+#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL_MASK 0x000000C0L
+#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL_MASK 0x00000300L
+#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL_MASK 0x00000C00L
//BPHYC_DAC_MACRO_CNTL
#define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_LEVEL__SHIFT 0x0
#define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_FINE_CONTROL__SHIFT 0x8
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
index 4ccf9681c45d..721c61171045 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
@@ -3895,6 +3895,10 @@
#define mmCM0_CM_MEM_PWR_CTRL_BASE_IDX 2
#define mmCM0_CM_MEM_PWR_STATUS 0x0d33
#define mmCM0_CM_MEM_PWR_STATUS_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_INDEX 0x0d35
+#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_DATA 0x0d36
+#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -4367,7 +4371,10 @@
#define mmCM1_CM_MEM_PWR_CTRL_BASE_IDX 2
#define mmCM1_CM_MEM_PWR_STATUS 0x0e4e
#define mmCM1_CM_MEM_PWR_STATUS_BASE_IDX 2
-
+#define mmCM1_CM_TEST_DEBUG_INDEX 0x0e50
+#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_DATA 0x0e51
+#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
// base address: 0x399c
@@ -4839,7 +4846,10 @@
#define mmCM2_CM_MEM_PWR_CTRL_BASE_IDX 2
#define mmCM2_CM_MEM_PWR_STATUS 0x0f69
#define mmCM2_CM_MEM_PWR_STATUS_BASE_IDX 2
-
+#define mmCM2_CM_TEST_DEBUG_INDEX 0x0f6b
+#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM2_CM_TEST_DEBUG_DATA 0x0f6c
+#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
// base address: 0x3e08
@@ -5311,7 +5321,10 @@
#define mmCM3_CM_MEM_PWR_CTRL_BASE_IDX 2
#define mmCM3_CM_MEM_PWR_STATUS 0x1084
#define mmCM3_CM_MEM_PWR_STATUS_BASE_IDX 2
-
+#define mmCM3_CM_TEST_DEBUG_INDEX 0x1086
+#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM3_CM_TEST_DEBUG_DATA 0x1087
+#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
// base address: 0x4274
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
index e2a2f114bd8e..e7c0cad41081 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
@@ -14049,6 +14049,14 @@
#define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE__SHIFT 0x2
#define CM0_CM_MEM_PWR_STATUS__SHARED_MEM_PWR_STATE_MASK 0x00000003L
#define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE_MASK 0x0000000CL
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM0_CM_TEST_DEBUG_DATA
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h
new file mode 100644
index 000000000000..9e19e723081b
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_DEFAULT_HEADER
+#define _df_1_7_DEFAULT_HEADER
+
+#define mmFabricConfigAccessControl_DEFAULT 0x00000000
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h
new file mode 100644
index 000000000000..e6044e27a913
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_OFFSET_HEADER
+#define _df_1_7_OFFSET_HEADER
+
+#define mmFabricConfigAccessControl 0x0410
+#define mmFabricConfigAccessControl_BASE_IDX 0
+
+#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc
+#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0
+
+#define mmDF_CS_AON0_DramBaseAddress0 0x0044
+#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0
+
+#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0 0x0214
+#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0_BASE_IDX 0
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h
new file mode 100644
index 000000000000..a78c99480e2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_SH_MASK_HEADER
+#define _df_1_7_SH_MASK_HEADER
+
+/* FabricConfigAccessControl */
+#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0
+#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1
+#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10
+#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L
+#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L
+#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L
+
+/* DF_PIE_AON0_DfGlobalClkGater */
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL
+
+/* DF_CS_AON0_DramBaseAddress0 */
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
+
+//DF_CS_AON0_CoherentSlaveModeCtrlA0
+#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW__SHIFT 0x3
+#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW_MASK 0x00000008L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h
new file mode 100644
index 000000000000..e58c207ac980
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_DEFAULT_HEADER
+#define _df_3_6_DEFAULT_HEADER
+
+#define mmFabricConfigAccessControl_DEFAULT 0x00000000
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
new file mode 100644
index 000000000000..a9575db8d7aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_OFFSET_HEADER
+#define _df_3_6_OFFSET_HEADER
+
+#define mmFabricConfigAccessControl 0x0410
+#define mmFabricConfigAccessControl_BASE_IDX 0
+
+#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc
+#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0
+
+#define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044
+#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
new file mode 100644
index 000000000000..88f7c69df6b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_SH_MASK_HEADER
+#define _df_3_6_SH_MASK_HEADER
+
+/* FabricConfigAccessControl */
+#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0
+#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1
+#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10
+#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L
+#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L
+#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L
+
+/* DF_PIE_AON0_DfGlobalClkGater */
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL
+
+/* DF_CS_AON0_DramBaseAddress0 */
+#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0
+#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8
+#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
+#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
+#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
+#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index f696bbb643ef..7931502fa54f 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -632,6 +632,13 @@ typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2
ULONG ulReserved;
}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2;
+typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3
+{
+ COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock;
+ USHORT usMclk_fcw_frac; //fractional divider of fcw = usSclk_fcw_frac/65536
+ USHORT usMclk_fcw_int; //integer divider of fcwc
+}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3;
+
//Input parameter of DynamicMemorySettingsTable
//when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM
typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index de177ce8ca80..c6c1666ac120 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1219,6 +1219,41 @@ struct atom_gfx_info_v2_3 {
uint32_t rm21_sram_vmin_value;
};
+struct atom_gfx_info_v2_4 {
+ struct atom_common_table_header table_header;
+ uint8_t gfxip_min_ver;
+ uint8_t gfxip_max_ver;
+ uint8_t gc_num_se;
+ uint8_t max_tile_pipes;
+ uint8_t gc_num_cu_per_sh;
+ uint8_t gc_num_sh_per_se;
+ uint8_t gc_num_rb_per_se;
+ uint8_t gc_num_tccs;
+ uint32_t regaddr_cp_dma_src_addr;
+ uint32_t regaddr_cp_dma_src_addr_hi;
+ uint32_t regaddr_cp_dma_dst_addr;
+ uint32_t regaddr_cp_dma_dst_addr_hi;
+ uint32_t regaddr_cp_dma_command;
+ uint32_t regaddr_cp_status;
+ uint32_t regaddr_rlc_gpu_clock_32;
+ uint32_t rlc_gpu_timer_refclk;
+ uint8_t active_cu_per_sh;
+ uint8_t active_rb_per_se;
+ uint16_t gcgoldenoffset;
+ uint16_t gc_num_gprs;
+ uint16_t gc_gsprim_buff_depth;
+ uint16_t gc_parameter_cache_depth;
+ uint16_t gc_wave_size;
+ uint16_t gc_max_waves_per_simd;
+ uint16_t gc_lds_size;
+ uint8_t gc_num_max_gs_thds;
+ uint8_t gc_gs_table_depth;
+ uint8_t gc_double_offchip_lds_buffer;
+ uint8_t gc_max_scratch_slots_per_cu;
+ uint32_t sram_rm_fuses_val;
+ uint32_t sram_custom_rm_fuses_val;
+};
+
/*
***************************************************************************
Data Table smu_info structure
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index f2814ae7ecdd..a69deb3a2ac0 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -42,20 +42,6 @@ enum cgs_ind_reg {
CGS_IND_REG__AUDIO_ENDPT
};
-/**
- * enum cgs_engine - Engines that can be statically power-gated
- */
-enum cgs_engine {
- CGS_ENGINE__UVD,
- CGS_ENGINE__VCE,
- CGS_ENGINE__VP8,
- CGS_ENGINE__ACP_DMA,
- CGS_ENGINE__ACP_DSP0,
- CGS_ENGINE__ACP_DSP1,
- CGS_ENGINE__ISP,
- /* ... */
-};
-
/*
* enum cgs_ucode_id - Firmware types for different IPs
*/
@@ -76,17 +62,6 @@ enum cgs_ucode_id {
CGS_UCODE_ID_MAXIMUM,
};
-/*
- * enum cgs_resource_type - GPU resource type
- */
-enum cgs_resource_type {
- CGS_RESOURCE_TYPE_MMIO = 0,
- CGS_RESOURCE_TYPE_FB,
- CGS_RESOURCE_TYPE_IO,
- CGS_RESOURCE_TYPE_DOORBELL,
- CGS_RESOURCE_TYPE_ROM,
-};
-
/**
* struct cgs_firmware_info - Firmware information
*/
@@ -104,17 +79,6 @@ struct cgs_firmware_info {
bool is_kicker;
};
-struct cgs_mode_info {
- uint32_t refresh_rate;
- uint32_t vblank_time_us;
-};
-
-struct cgs_display_info {
- uint32_t display_count;
- uint32_t active_display_mask;
- struct cgs_mode_info *mode_info;
-};
-
typedef unsigned long cgs_handle_t;
/**
@@ -170,119 +134,18 @@ typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs
#define CGS_WREG32_FIELD_IND(device, space, reg, field, val) \
cgs_write_ind_register(device, space, ix##reg, (cgs_read_ind_register(device, space, ix##reg) & ~CGS_REG_FIELD_MASK(reg, field)) | (val) << CGS_REG_FIELD_SHIFT(reg, field))
-/**
- * cgs_get_pci_resource() - provide access to a device resource (PCI BAR)
- * @cgs_device: opaque device handle
- * @resource_type: Type of Resource (MMIO, IO, ROM, FB, DOORBELL)
- * @size: size of the region
- * @offset: offset from the start of the region
- * @resource_base: base address (not including offset) returned
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device,
- enum cgs_resource_type resource_type,
- uint64_t size,
- uint64_t offset,
- uint64_t *resource_base);
-
-/**
- * cgs_atom_get_data_table() - Get a pointer to an ATOM BIOS data table
- * @cgs_device: opaque device handle
- * @table: data table index
- * @size: size of the table (output, may be NULL)
- * @frev: table format revision (output, may be NULL)
- * @crev: table content revision (output, may be NULL)
- *
- * Return: Pointer to start of the table, or NULL on failure
- */
-typedef const void *(*cgs_atom_get_data_table_t)(
- struct cgs_device *cgs_device, unsigned table,
- uint16_t *size, uint8_t *frev, uint8_t *crev);
-
-/**
- * cgs_atom_get_cmd_table_revs() - Get ATOM BIOS command table revisions
- * @cgs_device: opaque device handle
- * @table: data table index
- * @frev: table format revision (output, may be NULL)
- * @crev: table content revision (output, may be NULL)
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table,
- uint8_t *frev, uint8_t *crev);
-
-/**
- * cgs_atom_exec_cmd_table() - Execute an ATOM BIOS command table
- * @cgs_device: opaque device handle
- * @table: command table index
- * @args: arguments
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
- unsigned table, void *args);
-
-/**
- * cgs_get_firmware_info - Get the firmware information from core driver
- * @cgs_device: opaque device handle
- * @type: the firmware type
- * @info: returend firmware information
- *
- * Return: 0 on success, -errno otherwise
- */
typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
enum cgs_ucode_id type,
struct cgs_firmware_info *info);
-typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device,
- enum cgs_ucode_id type);
-
-typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_powergating_state state);
-
-typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device,
- enum amd_ip_block_type block_type,
- enum amd_clockgating_state state);
-
-typedef int(*cgs_get_active_displays_info)(
- struct cgs_device *cgs_device,
- struct cgs_display_info *info);
-
-typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled);
-
-typedef int (*cgs_is_virtualization_enabled_t)(void *cgs_device);
-
-typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en);
-
-typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock);
-
struct cgs_ops {
/* MMIO access */
cgs_read_register_t read_register;
cgs_write_register_t write_register;
cgs_read_ind_register_t read_ind_register;
cgs_write_ind_register_t write_ind_register;
- /* PCI resources */
- cgs_get_pci_resource_t get_pci_resource;
- /* ATOM BIOS */
- cgs_atom_get_data_table_t atom_get_data_table;
- cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs;
- cgs_atom_exec_cmd_table_t atom_exec_cmd_table;
/* Firmware Info */
cgs_get_firmware_info get_firmware_info;
- cgs_rel_firmware rel_firmware;
- /* cg pg interface*/
- cgs_set_powergating_state set_powergating_state;
- cgs_set_clockgating_state set_clockgating_state;
- /* display manager */
- cgs_get_active_displays_info get_active_displays_info;
- /* notify dpm enabled */
- cgs_notify_dpm_enabled notify_dpm_enabled;
- cgs_is_virtualization_enabled_t is_virtualization_enabled;
- cgs_enter_safe_mode enter_safe_mode;
- cgs_lock_grbm_idx lock_grbm_idx;
};
struct cgs_os_ops; /* To be define in OS-specific CGS header */
@@ -309,40 +172,7 @@ struct cgs_device
#define cgs_write_ind_register(dev,space,index,value) \
CGS_CALL(write_ind_register,dev,space,index,value)
-#define cgs_atom_get_data_table(dev,table,size,frev,crev) \
- CGS_CALL(atom_get_data_table,dev,table,size,frev,crev)
-#define cgs_atom_get_cmd_table_revs(dev,table,frev,crev) \
- CGS_CALL(atom_get_cmd_table_revs,dev,table,frev,crev)
-#define cgs_atom_exec_cmd_table(dev,table,args) \
- CGS_CALL(atom_exec_cmd_table,dev,table,args)
-
#define cgs_get_firmware_info(dev, type, info) \
CGS_CALL(get_firmware_info, dev, type, info)
-#define cgs_rel_firmware(dev, type) \
- CGS_CALL(rel_firmware, dev, type)
-#define cgs_set_powergating_state(dev, block_type, state) \
- CGS_CALL(set_powergating_state, dev, block_type, state)
-#define cgs_set_clockgating_state(dev, block_type, state) \
- CGS_CALL(set_clockgating_state, dev, block_type, state)
-#define cgs_notify_dpm_enabled(dev, enabled) \
- CGS_CALL(notify_dpm_enabled, dev, enabled)
-
-#define cgs_get_active_displays_info(dev, info) \
- CGS_CALL(get_active_displays_info, dev, info)
-
-#define cgs_get_pci_resource(cgs_device, resource_type, size, offset, \
- resource_base) \
- CGS_CALL(get_pci_resource, cgs_device, resource_type, size, offset, \
- resource_base)
-
-#define cgs_is_virtualization_enabled(cgs_device) \
- CGS_CALL(is_virtualization_enabled, cgs_device)
-
-#define cgs_enter_safe_mode(cgs_device, en) \
- CGS_CALL(enter_safe_mode, cgs_device, en)
-
-#define cgs_lock_grbm_idx(cgs_device, lock) \
- CGS_CALL(lock_grbm_idx, cgs_device, lock)
-
#endif /* _CGS_COMMON_H */
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 237289a72bb7..5733fbee07f7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources {
/* Bit n == 1 means Queue n is available for KFD */
DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
+ /* Doorbell assignments (SOC15 and later chips only). Only
+ * specific doorbells are routed to each SDMA engine. Others
+ * are routed to IH and VCN. They are not usable by the CP.
+ *
+ * Any doorbell number D that satisfies the following condition
+ * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
+ *
+ * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
+ * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
+ * mask would be set to 0x1f8 and val set to 0x0f0.
+ */
+ unsigned int sdma_doorbell[2][2];
+ unsigned int reserved_doorbell_mask;
+ unsigned int reserved_doorbell_val;
+
/* Base address of doorbell aperture. */
phys_addr_t doorbell_physical_address;
@@ -173,8 +188,6 @@ struct tile_config {
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
* scheduling mode. Only used for no cp scheduling mode.
*
- * @init_pipeline: Initialized the compute pipelines.
- *
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
* sceduling mode.
*
@@ -274,9 +287,6 @@ struct kfd2kgd_calls {
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
- int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -382,6 +392,10 @@ struct kfd2kgd_calls {
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
* for safe eviction of KFD BOs that belong to the specified process.
*
@@ -399,6 +413,8 @@ struct kgd2kfd_calls {
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
+ int (*quiesce_mm)(struct mm_struct *mm);
+ int (*resume_mm)(struct mm_struct *mm);
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
struct dma_fence *fence);
};
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5c840c022b52..06f08f34a110 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -94,6 +94,7 @@ enum pp_clock_type {
PP_PCIE,
OD_SCLK,
OD_MCLK,
+ OD_RANGE,
};
enum amd_pp_sensors {
@@ -149,13 +150,6 @@ struct pp_states_info {
uint32_t states[16];
};
-struct pp_gpu_power {
- uint32_t vddc_power;
- uint32_t vddci_power;
- uint32_t max_gpu_power;
- uint32_t average_gpu_power;
-};
-
#define PP_GROUP_MASK 0xF0000000
#define PP_GROUP_SHIFT 28
@@ -246,11 +240,6 @@ struct amd_pm_funcs {
int (*load_firmware)(void *handle);
int (*wait_for_fw_loading_complete)(void *handle);
int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id);
- int (*notify_smu_memory_info)(void *handle, uint32_t virtual_addr_low,
- uint32_t virtual_addr_hi,
- uint32_t mc_addr_low,
- uint32_t mc_addr_hi,
- uint32_t size);
int (*set_power_limit)(void *handle, uint32_t n);
int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit);
/* export to DC */
diff --git a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
index a12d4f27cfa4..12e196c15bbe 100644
--- a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
+++ b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
@@ -43,6 +43,7 @@ enum soc15_ih_clientid {
SOC15_IH_CLIENTID_SE2SH = 0x0c,
SOC15_IH_CLIENTID_SE3SH = 0x0d,
SOC15_IH_CLIENTID_SYSHUB = 0x0e,
+ SOC15_IH_CLIENTID_UVD1 = 0x0e,
SOC15_IH_CLIENTID_THM = 0x0f,
SOC15_IH_CLIENTID_UVD = 0x10,
SOC15_IH_CLIENTID_VCE0 = 0x11,
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index 2fb25abaf7c8..ceaf4932258d 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -29,10 +29,10 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_rb_base;
uint32_t sdmax_rlcx_rb_base_hi;
uint32_t sdmax_rlcx_rb_rptr;
+ uint32_t sdmax_rlcx_rb_rptr_hi;
uint32_t sdmax_rlcx_rb_wptr;
+ uint32_t sdmax_rlcx_rb_wptr_hi;
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
uint32_t sdmax_rlcx_ib_cntl;
@@ -44,29 +44,29 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_skip_cntl;
uint32_t sdmax_rlcx_context_status;
uint32_t sdmax_rlcx_doorbell;
- uint32_t sdmax_rlcx_virtual_addr;
- uint32_t sdmax_rlcx_ape1_cntl;
+ uint32_t sdmax_rlcx_status;
uint32_t sdmax_rlcx_doorbell_log;
- uint32_t reserved_22;
- uint32_t reserved_23;
- uint32_t reserved_24;
- uint32_t reserved_25;
- uint32_t reserved_26;
- uint32_t reserved_27;
- uint32_t reserved_28;
- uint32_t reserved_29;
- uint32_t reserved_30;
- uint32_t reserved_31;
- uint32_t reserved_32;
- uint32_t reserved_33;
- uint32_t reserved_34;
- uint32_t reserved_35;
- uint32_t reserved_36;
- uint32_t reserved_37;
- uint32_t reserved_38;
- uint32_t reserved_39;
- uint32_t reserved_40;
- uint32_t reserved_41;
+ uint32_t sdmax_rlcx_watermark;
+ uint32_t sdmax_rlcx_doorbell_offset;
+ uint32_t sdmax_rlcx_csa_addr_lo;
+ uint32_t sdmax_rlcx_csa_addr_hi;
+ uint32_t sdmax_rlcx_ib_sub_remain;
+ uint32_t sdmax_rlcx_preempt;
+ uint32_t sdmax_rlcx_dummy_reg;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
+ uint32_t sdmax_rlcx_rb_aql_cntl;
+ uint32_t sdmax_rlcx_minor_ptr_update;
+ uint32_t sdmax_rlcx_midcmd_data0;
+ uint32_t sdmax_rlcx_midcmd_data1;
+ uint32_t sdmax_rlcx_midcmd_data2;
+ uint32_t sdmax_rlcx_midcmd_data3;
+ uint32_t sdmax_rlcx_midcmd_data4;
+ uint32_t sdmax_rlcx_midcmd_data5;
+ uint32_t sdmax_rlcx_midcmd_data6;
+ uint32_t sdmax_rlcx_midcmd_data7;
+ uint32_t sdmax_rlcx_midcmd_data8;
+ uint32_t sdmax_rlcx_midcmd_cntl;
uint32_t reserved_42;
uint32_t reserved_43;
uint32_t reserved_44;
diff --git a/drivers/gpu/drm/amd/include/vega20_ip_offset.h b/drivers/gpu/drm/amd/include/vega20_ip_offset.h
new file mode 100644
index 000000000000..2a2a9cc8bedb
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/vega20_ip_offset.h
@@ -0,0 +1,1051 @@
+/*
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _vega20_ip_offset_HEADER
+#define _vega20_ip_offset_HEADER
+
+#define MAX_INSTANCE 6
+#define MAX_SEGMENT 6
+
+
+struct IP_BASE_INSTANCE
+{
+ unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE
+{
+ struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+
+static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C20, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE CLK_BASE ={ { { { 0x00016C00, 0x00016E00, 0x00017000, 0x00017200, 0x0001B000, 0x0001B200 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE DCE_BASE ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE DF_BASE ={ { { { 0x00007000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE FUSE_BASE ={ { { { 0x00017400, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE GC_BASE ={ { { { 0x00002000, 0x0000A000, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE HDP_BASE ={ { { { 0x00000F20, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MMHUB_BASE ={ { { { 0x0001A000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE NBIO_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SDMA0_BASE ={ { { { 0x00001260, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SDMA1_BASE ={ { { { 0x00001860, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SMUIO_BASE ={ { { { 0x00016800, 0x00016A00, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE THM_BASE ={ { { { 0x00016600, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE UMC_BASE ={ { { { 0x00014000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE UVD_BASE ={ { { { 0x00007800, 0x00007E00, 0, 0, 0, 0 } },
+ { { 0, 0x00009000, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+/* Adjust VCE_BASE to make vce_4_1 use vce_4_0 offset header files*/
+static const struct IP_BASE VCE_BASE ={ { { { 0x00007E00/* 0x00008800 */, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE XDMA_BASE ={ { { { 0x00003400, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE RSMU_BASE ={ { { { 0x00012000, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0 } } } };
+
+
+#define ATHUB_BASE__INST0_SEG0 0x00000C20
+#define ATHUB_BASE__INST0_SEG1 0
+#define ATHUB_BASE__INST0_SEG2 0
+#define ATHUB_BASE__INST0_SEG3 0
+#define ATHUB_BASE__INST0_SEG4 0
+#define ATHUB_BASE__INST0_SEG5 0
+
+#define ATHUB_BASE__INST1_SEG0 0
+#define ATHUB_BASE__INST1_SEG1 0
+#define ATHUB_BASE__INST1_SEG2 0
+#define ATHUB_BASE__INST1_SEG3 0
+#define ATHUB_BASE__INST1_SEG4 0
+#define ATHUB_BASE__INST1_SEG5 0
+
+#define ATHUB_BASE__INST2_SEG0 0
+#define ATHUB_BASE__INST2_SEG1 0
+#define ATHUB_BASE__INST2_SEG2 0
+#define ATHUB_BASE__INST2_SEG3 0
+#define ATHUB_BASE__INST2_SEG4 0
+#define ATHUB_BASE__INST2_SEG5 0
+
+#define ATHUB_BASE__INST3_SEG0 0
+#define ATHUB_BASE__INST3_SEG1 0
+#define ATHUB_BASE__INST3_SEG2 0
+#define ATHUB_BASE__INST3_SEG3 0
+#define ATHUB_BASE__INST3_SEG4 0
+#define ATHUB_BASE__INST3_SEG5 0
+
+#define ATHUB_BASE__INST4_SEG0 0
+#define ATHUB_BASE__INST4_SEG1 0
+#define ATHUB_BASE__INST4_SEG2 0
+#define ATHUB_BASE__INST4_SEG3 0
+#define ATHUB_BASE__INST4_SEG4 0
+#define ATHUB_BASE__INST4_SEG5 0
+
+#define ATHUB_BASE__INST5_SEG0 0
+#define ATHUB_BASE__INST5_SEG1 0
+#define ATHUB_BASE__INST5_SEG2 0
+#define ATHUB_BASE__INST5_SEG3 0
+#define ATHUB_BASE__INST5_SEG4 0
+#define ATHUB_BASE__INST5_SEG5 0
+
+#define CLK_BASE__INST0_SEG0 0x00016C00
+#define CLK_BASE__INST0_SEG1 0x00016E00
+#define CLK_BASE__INST0_SEG2 0x00017000
+#define CLK_BASE__INST0_SEG3 0x00017200
+#define CLK_BASE__INST0_SEG4 0x0001B000
+#define CLK_BASE__INST0_SEG5 0x0001B200
+
+#define CLK_BASE__INST1_SEG0 0
+#define CLK_BASE__INST1_SEG1 0
+#define CLK_BASE__INST1_SEG2 0
+#define CLK_BASE__INST1_SEG3 0
+#define CLK_BASE__INST1_SEG4 0
+#define CLK_BASE__INST1_SEG5 0
+
+#define CLK_BASE__INST2_SEG0 0
+#define CLK_BASE__INST2_SEG1 0
+#define CLK_BASE__INST2_SEG2 0
+#define CLK_BASE__INST2_SEG3 0
+#define CLK_BASE__INST2_SEG4 0
+#define CLK_BASE__INST2_SEG5 0
+
+#define CLK_BASE__INST3_SEG0 0
+#define CLK_BASE__INST3_SEG1 0
+#define CLK_BASE__INST3_SEG2 0
+#define CLK_BASE__INST3_SEG3 0
+#define CLK_BASE__INST3_SEG4 0
+#define CLK_BASE__INST3_SEG5 0
+
+#define CLK_BASE__INST4_SEG0 0
+#define CLK_BASE__INST4_SEG1 0
+#define CLK_BASE__INST4_SEG2 0
+#define CLK_BASE__INST4_SEG3 0
+#define CLK_BASE__INST4_SEG4 0
+#define CLK_BASE__INST4_SEG5 0
+
+#define CLK_BASE__INST5_SEG0 0
+#define CLK_BASE__INST5_SEG1 0
+#define CLK_BASE__INST5_SEG2 0
+#define CLK_BASE__INST5_SEG3 0
+#define CLK_BASE__INST5_SEG4 0
+#define CLK_BASE__INST5_SEG5 0
+
+#define DCE_BASE__INST0_SEG0 0x00000012
+#define DCE_BASE__INST0_SEG1 0x000000C0
+#define DCE_BASE__INST0_SEG2 0x000034C0
+#define DCE_BASE__INST0_SEG3 0
+#define DCE_BASE__INST0_SEG4 0
+#define DCE_BASE__INST0_SEG5 0
+
+#define DCE_BASE__INST1_SEG0 0
+#define DCE_BASE__INST1_SEG1 0
+#define DCE_BASE__INST1_SEG2 0
+#define DCE_BASE__INST1_SEG3 0
+#define DCE_BASE__INST1_SEG4 0
+#define DCE_BASE__INST1_SEG5 0
+
+#define DCE_BASE__INST2_SEG0 0
+#define DCE_BASE__INST2_SEG1 0
+#define DCE_BASE__INST2_SEG2 0
+#define DCE_BASE__INST2_SEG3 0
+#define DCE_BASE__INST2_SEG4 0
+#define DCE_BASE__INST2_SEG5 0
+
+#define DCE_BASE__INST3_SEG0 0
+#define DCE_BASE__INST3_SEG1 0
+#define DCE_BASE__INST3_SEG2 0
+#define DCE_BASE__INST3_SEG3 0
+#define DCE_BASE__INST3_SEG4 0
+#define DCE_BASE__INST3_SEG5 0
+
+#define DCE_BASE__INST4_SEG0 0
+#define DCE_BASE__INST4_SEG1 0
+#define DCE_BASE__INST4_SEG2 0
+#define DCE_BASE__INST4_SEG3 0
+#define DCE_BASE__INST4_SEG4 0
+#define DCE_BASE__INST4_SEG5 0
+
+#define DCE_BASE__INST5_SEG0 0
+#define DCE_BASE__INST5_SEG1 0
+#define DCE_BASE__INST5_SEG2 0
+#define DCE_BASE__INST5_SEG3 0
+#define DCE_BASE__INST5_SEG4 0
+#define DCE_BASE__INST5_SEG5 0
+
+#define DF_BASE__INST0_SEG0 0x00007000
+#define DF_BASE__INST0_SEG1 0
+#define DF_BASE__INST0_SEG2 0
+#define DF_BASE__INST0_SEG3 0
+#define DF_BASE__INST0_SEG4 0
+#define DF_BASE__INST0_SEG5 0
+
+#define DF_BASE__INST1_SEG0 0
+#define DF_BASE__INST1_SEG1 0
+#define DF_BASE__INST1_SEG2 0
+#define DF_BASE__INST1_SEG3 0
+#define DF_BASE__INST1_SEG4 0
+#define DF_BASE__INST1_SEG5 0
+
+#define DF_BASE__INST2_SEG0 0
+#define DF_BASE__INST2_SEG1 0
+#define DF_BASE__INST2_SEG2 0
+#define DF_BASE__INST2_SEG3 0
+#define DF_BASE__INST2_SEG4 0
+#define DF_BASE__INST2_SEG5 0
+
+#define DF_BASE__INST3_SEG0 0
+#define DF_BASE__INST3_SEG1 0
+#define DF_BASE__INST3_SEG2 0
+#define DF_BASE__INST3_SEG3 0
+#define DF_BASE__INST3_SEG4 0
+#define DF_BASE__INST3_SEG5 0
+
+#define DF_BASE__INST4_SEG0 0
+#define DF_BASE__INST4_SEG1 0
+#define DF_BASE__INST4_SEG2 0
+#define DF_BASE__INST4_SEG3 0
+#define DF_BASE__INST4_SEG4 0
+#define DF_BASE__INST4_SEG5 0
+
+#define DF_BASE__INST5_SEG0 0
+#define DF_BASE__INST5_SEG1 0
+#define DF_BASE__INST5_SEG2 0
+#define DF_BASE__INST5_SEG3 0
+#define DF_BASE__INST5_SEG4 0
+#define DF_BASE__INST5_SEG5 0
+
+#define FUSE_BASE__INST0_SEG0 0x00017400
+#define FUSE_BASE__INST0_SEG1 0
+#define FUSE_BASE__INST0_SEG2 0
+#define FUSE_BASE__INST0_SEG3 0
+#define FUSE_BASE__INST0_SEG4 0
+#define FUSE_BASE__INST0_SEG5 0
+
+#define FUSE_BASE__INST1_SEG0 0
+#define FUSE_BASE__INST1_SEG1 0
+#define FUSE_BASE__INST1_SEG2 0
+#define FUSE_BASE__INST1_SEG3 0
+#define FUSE_BASE__INST1_SEG4 0
+#define FUSE_BASE__INST1_SEG5 0
+
+#define FUSE_BASE__INST2_SEG0 0
+#define FUSE_BASE__INST2_SEG1 0
+#define FUSE_BASE__INST2_SEG2 0
+#define FUSE_BASE__INST2_SEG3 0
+#define FUSE_BASE__INST2_SEG4 0
+#define FUSE_BASE__INST2_SEG5 0
+
+#define FUSE_BASE__INST3_SEG0 0
+#define FUSE_BASE__INST3_SEG1 0
+#define FUSE_BASE__INST3_SEG2 0
+#define FUSE_BASE__INST3_SEG3 0
+#define FUSE_BASE__INST3_SEG4 0
+#define FUSE_BASE__INST3_SEG5 0
+
+#define FUSE_BASE__INST4_SEG0 0
+#define FUSE_BASE__INST4_SEG1 0
+#define FUSE_BASE__INST4_SEG2 0
+#define FUSE_BASE__INST4_SEG3 0
+#define FUSE_BASE__INST4_SEG4 0
+#define FUSE_BASE__INST4_SEG5 0
+
+#define FUSE_BASE__INST5_SEG0 0
+#define FUSE_BASE__INST5_SEG1 0
+#define FUSE_BASE__INST5_SEG2 0
+#define FUSE_BASE__INST5_SEG3 0
+#define FUSE_BASE__INST5_SEG4 0
+#define FUSE_BASE__INST5_SEG5 0
+
+#define GC_BASE__INST0_SEG0 0x00002000
+#define GC_BASE__INST0_SEG1 0x0000A000
+#define GC_BASE__INST0_SEG2 0
+#define GC_BASE__INST0_SEG3 0
+#define GC_BASE__INST0_SEG4 0
+#define GC_BASE__INST0_SEG5 0
+
+#define GC_BASE__INST1_SEG0 0
+#define GC_BASE__INST1_SEG1 0
+#define GC_BASE__INST1_SEG2 0
+#define GC_BASE__INST1_SEG3 0
+#define GC_BASE__INST1_SEG4 0
+#define GC_BASE__INST1_SEG5 0
+
+#define GC_BASE__INST2_SEG0 0
+#define GC_BASE__INST2_SEG1 0
+#define GC_BASE__INST2_SEG2 0
+#define GC_BASE__INST2_SEG3 0
+#define GC_BASE__INST2_SEG4 0
+#define GC_BASE__INST2_SEG5 0
+
+#define GC_BASE__INST3_SEG0 0
+#define GC_BASE__INST3_SEG1 0
+#define GC_BASE__INST3_SEG2 0
+#define GC_BASE__INST3_SEG3 0
+#define GC_BASE__INST3_SEG4 0
+#define GC_BASE__INST3_SEG5 0
+
+#define GC_BASE__INST4_SEG0 0
+#define GC_BASE__INST4_SEG1 0
+#define GC_BASE__INST4_SEG2 0
+#define GC_BASE__INST4_SEG3 0
+#define GC_BASE__INST4_SEG4 0
+#define GC_BASE__INST4_SEG5 0
+
+#define GC_BASE__INST5_SEG0 0
+#define GC_BASE__INST5_SEG1 0
+#define GC_BASE__INST5_SEG2 0
+#define GC_BASE__INST5_SEG3 0
+#define GC_BASE__INST5_SEG4 0
+#define GC_BASE__INST5_SEG5 0
+
+#define HDP_BASE__INST0_SEG0 0x00000F20
+#define HDP_BASE__INST0_SEG1 0
+#define HDP_BASE__INST0_SEG2 0
+#define HDP_BASE__INST0_SEG3 0
+#define HDP_BASE__INST0_SEG4 0
+#define HDP_BASE__INST0_SEG5 0
+
+#define HDP_BASE__INST1_SEG0 0
+#define HDP_BASE__INST1_SEG1 0
+#define HDP_BASE__INST1_SEG2 0
+#define HDP_BASE__INST1_SEG3 0
+#define HDP_BASE__INST1_SEG4 0
+#define HDP_BASE__INST1_SEG5 0
+
+#define HDP_BASE__INST2_SEG0 0
+#define HDP_BASE__INST2_SEG1 0
+#define HDP_BASE__INST2_SEG2 0
+#define HDP_BASE__INST2_SEG3 0
+#define HDP_BASE__INST2_SEG4 0
+#define HDP_BASE__INST2_SEG5 0
+
+#define HDP_BASE__INST3_SEG0 0
+#define HDP_BASE__INST3_SEG1 0
+#define HDP_BASE__INST3_SEG2 0
+#define HDP_BASE__INST3_SEG3 0
+#define HDP_BASE__INST3_SEG4 0
+#define HDP_BASE__INST3_SEG5 0
+
+#define HDP_BASE__INST4_SEG0 0
+#define HDP_BASE__INST4_SEG1 0
+#define HDP_BASE__INST4_SEG2 0
+#define HDP_BASE__INST4_SEG3 0
+#define HDP_BASE__INST4_SEG4 0
+#define HDP_BASE__INST4_SEG5 0
+
+#define HDP_BASE__INST5_SEG0 0
+#define HDP_BASE__INST5_SEG1 0
+#define HDP_BASE__INST5_SEG2 0
+#define HDP_BASE__INST5_SEG3 0
+#define HDP_BASE__INST5_SEG4 0
+#define HDP_BASE__INST5_SEG5 0
+
+#define MMHUB_BASE__INST0_SEG0 0x0001A000
+#define MMHUB_BASE__INST0_SEG1 0
+#define MMHUB_BASE__INST0_SEG2 0
+#define MMHUB_BASE__INST0_SEG3 0
+#define MMHUB_BASE__INST0_SEG4 0
+#define MMHUB_BASE__INST0_SEG5 0
+
+#define MMHUB_BASE__INST1_SEG0 0
+#define MMHUB_BASE__INST1_SEG1 0
+#define MMHUB_BASE__INST1_SEG2 0
+#define MMHUB_BASE__INST1_SEG3 0
+#define MMHUB_BASE__INST1_SEG4 0
+#define MMHUB_BASE__INST1_SEG5 0
+
+#define MMHUB_BASE__INST2_SEG0 0
+#define MMHUB_BASE__INST2_SEG1 0
+#define MMHUB_BASE__INST2_SEG2 0
+#define MMHUB_BASE__INST2_SEG3 0
+#define MMHUB_BASE__INST2_SEG4 0
+#define MMHUB_BASE__INST2_SEG5 0
+
+#define MMHUB_BASE__INST3_SEG0 0
+#define MMHUB_BASE__INST3_SEG1 0
+#define MMHUB_BASE__INST3_SEG2 0
+#define MMHUB_BASE__INST3_SEG3 0
+#define MMHUB_BASE__INST3_SEG4 0
+#define MMHUB_BASE__INST3_SEG5 0
+
+#define MMHUB_BASE__INST4_SEG0 0
+#define MMHUB_BASE__INST4_SEG1 0
+#define MMHUB_BASE__INST4_SEG2 0
+#define MMHUB_BASE__INST4_SEG3 0
+#define MMHUB_BASE__INST4_SEG4 0
+#define MMHUB_BASE__INST4_SEG5 0
+
+#define MMHUB_BASE__INST5_SEG0 0
+#define MMHUB_BASE__INST5_SEG1 0
+#define MMHUB_BASE__INST5_SEG2 0
+#define MMHUB_BASE__INST5_SEG3 0
+#define MMHUB_BASE__INST5_SEG4 0
+#define MMHUB_BASE__INST5_SEG5 0
+
+#define MP0_BASE__INST0_SEG0 0x00016000
+#define MP0_BASE__INST0_SEG1 0
+#define MP0_BASE__INST0_SEG2 0
+#define MP0_BASE__INST0_SEG3 0
+#define MP0_BASE__INST0_SEG4 0
+#define MP0_BASE__INST0_SEG5 0
+
+#define MP0_BASE__INST1_SEG0 0
+#define MP0_BASE__INST1_SEG1 0
+#define MP0_BASE__INST1_SEG2 0
+#define MP0_BASE__INST1_SEG3 0
+#define MP0_BASE__INST1_SEG4 0
+#define MP0_BASE__INST1_SEG5 0
+
+#define MP0_BASE__INST2_SEG0 0
+#define MP0_BASE__INST2_SEG1 0
+#define MP0_BASE__INST2_SEG2 0
+#define MP0_BASE__INST2_SEG3 0
+#define MP0_BASE__INST2_SEG4 0
+#define MP0_BASE__INST2_SEG5 0
+
+#define MP0_BASE__INST3_SEG0 0
+#define MP0_BASE__INST3_SEG1 0
+#define MP0_BASE__INST3_SEG2 0
+#define MP0_BASE__INST3_SEG3 0
+#define MP0_BASE__INST3_SEG4 0
+#define MP0_BASE__INST3_SEG5 0
+
+#define MP0_BASE__INST4_SEG0 0
+#define MP0_BASE__INST4_SEG1 0
+#define MP0_BASE__INST4_SEG2 0
+#define MP0_BASE__INST4_SEG3 0
+#define MP0_BASE__INST4_SEG4 0
+#define MP0_BASE__INST4_SEG5 0
+
+#define MP0_BASE__INST5_SEG0 0
+#define MP0_BASE__INST5_SEG1 0
+#define MP0_BASE__INST5_SEG2 0
+#define MP0_BASE__INST5_SEG3 0
+#define MP0_BASE__INST5_SEG4 0
+#define MP0_BASE__INST5_SEG5 0
+
+#define MP1_BASE__INST0_SEG0 0x00016000
+#define MP1_BASE__INST0_SEG1 0
+#define MP1_BASE__INST0_SEG2 0
+#define MP1_BASE__INST0_SEG3 0
+#define MP1_BASE__INST0_SEG4 0
+#define MP1_BASE__INST0_SEG5 0
+
+#define MP1_BASE__INST1_SEG0 0
+#define MP1_BASE__INST1_SEG1 0
+#define MP1_BASE__INST1_SEG2 0
+#define MP1_BASE__INST1_SEG3 0
+#define MP1_BASE__INST1_SEG4 0
+#define MP1_BASE__INST1_SEG5 0
+
+#define MP1_BASE__INST2_SEG0 0
+#define MP1_BASE__INST2_SEG1 0
+#define MP1_BASE__INST2_SEG2 0
+#define MP1_BASE__INST2_SEG3 0
+#define MP1_BASE__INST2_SEG4 0
+#define MP1_BASE__INST2_SEG5 0
+
+#define MP1_BASE__INST3_SEG0 0
+#define MP1_BASE__INST3_SEG1 0
+#define MP1_BASE__INST3_SEG2 0
+#define MP1_BASE__INST3_SEG3 0
+#define MP1_BASE__INST3_SEG4 0
+#define MP1_BASE__INST3_SEG5 0
+
+#define MP1_BASE__INST4_SEG0 0
+#define MP1_BASE__INST4_SEG1 0
+#define MP1_BASE__INST4_SEG2 0
+#define MP1_BASE__INST4_SEG3 0
+#define MP1_BASE__INST4_SEG4 0
+#define MP1_BASE__INST4_SEG5 0
+
+#define MP1_BASE__INST5_SEG0 0
+#define MP1_BASE__INST5_SEG1 0
+#define MP1_BASE__INST5_SEG2 0
+#define MP1_BASE__INST5_SEG3 0
+#define MP1_BASE__INST5_SEG4 0
+#define MP1_BASE__INST5_SEG5 0
+
+#define NBIO_BASE__INST0_SEG0 0x00000000
+#define NBIO_BASE__INST0_SEG1 0x00000014
+#define NBIO_BASE__INST0_SEG2 0x00000D20
+#define NBIO_BASE__INST0_SEG3 0x00010400
+#define NBIO_BASE__INST0_SEG4 0
+#define NBIO_BASE__INST0_SEG5 0
+
+#define NBIO_BASE__INST1_SEG0 0
+#define NBIO_BASE__INST1_SEG1 0
+#define NBIO_BASE__INST1_SEG2 0
+#define NBIO_BASE__INST1_SEG3 0
+#define NBIO_BASE__INST1_SEG4 0
+#define NBIO_BASE__INST1_SEG5 0
+
+#define NBIO_BASE__INST2_SEG0 0
+#define NBIO_BASE__INST2_SEG1 0
+#define NBIO_BASE__INST2_SEG2 0
+#define NBIO_BASE__INST2_SEG3 0
+#define NBIO_BASE__INST2_SEG4 0
+#define NBIO_BASE__INST2_SEG5 0
+
+#define NBIO_BASE__INST3_SEG0 0
+#define NBIO_BASE__INST3_SEG1 0
+#define NBIO_BASE__INST3_SEG2 0
+#define NBIO_BASE__INST3_SEG3 0
+#define NBIO_BASE__INST3_SEG4 0
+#define NBIO_BASE__INST3_SEG5 0
+
+#define NBIO_BASE__INST4_SEG0 0
+#define NBIO_BASE__INST4_SEG1 0
+#define NBIO_BASE__INST4_SEG2 0
+#define NBIO_BASE__INST4_SEG3 0
+#define NBIO_BASE__INST4_SEG4 0
+#define NBIO_BASE__INST4_SEG5 0
+
+#define NBIO_BASE__INST5_SEG0 0
+#define NBIO_BASE__INST5_SEG1 0
+#define NBIO_BASE__INST5_SEG2 0
+#define NBIO_BASE__INST5_SEG3 0
+#define NBIO_BASE__INST5_SEG4 0
+#define NBIO_BASE__INST5_SEG5 0
+
+#define OSSSYS_BASE__INST0_SEG0 0x000010A0
+#define OSSSYS_BASE__INST0_SEG1 0
+#define OSSSYS_BASE__INST0_SEG2 0
+#define OSSSYS_BASE__INST0_SEG3 0
+#define OSSSYS_BASE__INST0_SEG4 0
+#define OSSSYS_BASE__INST0_SEG5 0
+
+#define OSSSYS_BASE__INST1_SEG0 0
+#define OSSSYS_BASE__INST1_SEG1 0
+#define OSSSYS_BASE__INST1_SEG2 0
+#define OSSSYS_BASE__INST1_SEG3 0
+#define OSSSYS_BASE__INST1_SEG4 0
+#define OSSSYS_BASE__INST1_SEG5 0
+
+#define OSSSYS_BASE__INST2_SEG0 0
+#define OSSSYS_BASE__INST2_SEG1 0
+#define OSSSYS_BASE__INST2_SEG2 0
+#define OSSSYS_BASE__INST2_SEG3 0
+#define OSSSYS_BASE__INST2_SEG4 0
+#define OSSSYS_BASE__INST2_SEG5 0
+
+#define OSSSYS_BASE__INST3_SEG0 0
+#define OSSSYS_BASE__INST3_SEG1 0
+#define OSSSYS_BASE__INST3_SEG2 0
+#define OSSSYS_BASE__INST3_SEG3 0
+#define OSSSYS_BASE__INST3_SEG4 0
+#define OSSSYS_BASE__INST3_SEG5 0
+
+#define OSSSYS_BASE__INST4_SEG0 0
+#define OSSSYS_BASE__INST4_SEG1 0
+#define OSSSYS_BASE__INST4_SEG2 0
+#define OSSSYS_BASE__INST4_SEG3 0
+#define OSSSYS_BASE__INST4_SEG4 0
+#define OSSSYS_BASE__INST4_SEG5 0
+
+#define OSSSYS_BASE__INST5_SEG0 0
+#define OSSSYS_BASE__INST5_SEG1 0
+#define OSSSYS_BASE__INST5_SEG2 0
+#define OSSSYS_BASE__INST5_SEG3 0
+#define OSSSYS_BASE__INST5_SEG4 0
+#define OSSSYS_BASE__INST5_SEG5 0
+
+#define SDMA0_BASE__INST0_SEG0 0x00001260
+#define SDMA0_BASE__INST0_SEG1 0
+#define SDMA0_BASE__INST0_SEG2 0
+#define SDMA0_BASE__INST0_SEG3 0
+#define SDMA0_BASE__INST0_SEG4 0
+#define SDMA0_BASE__INST0_SEG5 0
+
+#define SDMA0_BASE__INST1_SEG0 0
+#define SDMA0_BASE__INST1_SEG1 0
+#define SDMA0_BASE__INST1_SEG2 0
+#define SDMA0_BASE__INST1_SEG3 0
+#define SDMA0_BASE__INST1_SEG4 0
+#define SDMA0_BASE__INST1_SEG5 0
+
+#define SDMA0_BASE__INST2_SEG0 0
+#define SDMA0_BASE__INST2_SEG1 0
+#define SDMA0_BASE__INST2_SEG2 0
+#define SDMA0_BASE__INST2_SEG3 0
+#define SDMA0_BASE__INST2_SEG4 0
+#define SDMA0_BASE__INST2_SEG5 0
+
+#define SDMA0_BASE__INST3_SEG0 0
+#define SDMA0_BASE__INST3_SEG1 0
+#define SDMA0_BASE__INST3_SEG2 0
+#define SDMA0_BASE__INST3_SEG3 0
+#define SDMA0_BASE__INST3_SEG4 0
+#define SDMA0_BASE__INST3_SEG5 0
+
+#define SDMA0_BASE__INST4_SEG0 0
+#define SDMA0_BASE__INST4_SEG1 0
+#define SDMA0_BASE__INST4_SEG2 0
+#define SDMA0_BASE__INST4_SEG3 0
+#define SDMA0_BASE__INST4_SEG4 0
+#define SDMA0_BASE__INST4_SEG5 0
+
+#define SDMA0_BASE__INST5_SEG0 0
+#define SDMA0_BASE__INST5_SEG1 0
+#define SDMA0_BASE__INST5_SEG2 0
+#define SDMA0_BASE__INST5_SEG3 0
+#define SDMA0_BASE__INST5_SEG4 0
+#define SDMA0_BASE__INST5_SEG5 0
+
+#define SDMA1_BASE__INST0_SEG0 0x00001860
+#define SDMA1_BASE__INST0_SEG1 0
+#define SDMA1_BASE__INST0_SEG2 0
+#define SDMA1_BASE__INST0_SEG3 0
+#define SDMA1_BASE__INST0_SEG4 0
+#define SDMA1_BASE__INST0_SEG5 0
+
+#define SDMA1_BASE__INST1_SEG0 0
+#define SDMA1_BASE__INST1_SEG1 0
+#define SDMA1_BASE__INST1_SEG2 0
+#define SDMA1_BASE__INST1_SEG3 0
+#define SDMA1_BASE__INST1_SEG4 0
+#define SDMA1_BASE__INST1_SEG5 0
+
+#define SDMA1_BASE__INST2_SEG0 0
+#define SDMA1_BASE__INST2_SEG1 0
+#define SDMA1_BASE__INST2_SEG2 0
+#define SDMA1_BASE__INST2_SEG3 0
+#define SDMA1_BASE__INST2_SEG4 0
+#define SDMA1_BASE__INST2_SEG5 0
+
+#define SDMA1_BASE__INST3_SEG0 0
+#define SDMA1_BASE__INST3_SEG1 0
+#define SDMA1_BASE__INST3_SEG2 0
+#define SDMA1_BASE__INST3_SEG3 0
+#define SDMA1_BASE__INST3_SEG4 0
+#define SDMA1_BASE__INST3_SEG5 0
+
+#define SDMA1_BASE__INST4_SEG0 0
+#define SDMA1_BASE__INST4_SEG1 0
+#define SDMA1_BASE__INST4_SEG2 0
+#define SDMA1_BASE__INST4_SEG3 0
+#define SDMA1_BASE__INST4_SEG4 0
+#define SDMA1_BASE__INST4_SEG5 0
+
+#define SDMA1_BASE__INST5_SEG0 0
+#define SDMA1_BASE__INST5_SEG1 0
+#define SDMA1_BASE__INST5_SEG2 0
+#define SDMA1_BASE__INST5_SEG3 0
+#define SDMA1_BASE__INST5_SEG4 0
+#define SDMA1_BASE__INST5_SEG5 0
+
+#define SMUIO_BASE__INST0_SEG0 0x00016800
+#define SMUIO_BASE__INST0_SEG1 0x00016A00
+#define SMUIO_BASE__INST0_SEG2 0
+#define SMUIO_BASE__INST0_SEG3 0
+#define SMUIO_BASE__INST0_SEG4 0
+#define SMUIO_BASE__INST0_SEG5 0
+
+#define SMUIO_BASE__INST1_SEG0 0
+#define SMUIO_BASE__INST1_SEG1 0
+#define SMUIO_BASE__INST1_SEG2 0
+#define SMUIO_BASE__INST1_SEG3 0
+#define SMUIO_BASE__INST1_SEG4 0
+#define SMUIO_BASE__INST1_SEG5 0
+
+#define SMUIO_BASE__INST2_SEG0 0
+#define SMUIO_BASE__INST2_SEG1 0
+#define SMUIO_BASE__INST2_SEG2 0
+#define SMUIO_BASE__INST2_SEG3 0
+#define SMUIO_BASE__INST2_SEG4 0
+#define SMUIO_BASE__INST2_SEG5 0
+
+#define SMUIO_BASE__INST3_SEG0 0
+#define SMUIO_BASE__INST3_SEG1 0
+#define SMUIO_BASE__INST3_SEG2 0
+#define SMUIO_BASE__INST3_SEG3 0
+#define SMUIO_BASE__INST3_SEG4 0
+#define SMUIO_BASE__INST3_SEG5 0
+
+#define SMUIO_BASE__INST4_SEG0 0
+#define SMUIO_BASE__INST4_SEG1 0
+#define SMUIO_BASE__INST4_SEG2 0
+#define SMUIO_BASE__INST4_SEG3 0
+#define SMUIO_BASE__INST4_SEG4 0
+#define SMUIO_BASE__INST4_SEG5 0
+
+#define SMUIO_BASE__INST5_SEG0 0
+#define SMUIO_BASE__INST5_SEG1 0
+#define SMUIO_BASE__INST5_SEG2 0
+#define SMUIO_BASE__INST5_SEG3 0
+#define SMUIO_BASE__INST5_SEG4 0
+#define SMUIO_BASE__INST5_SEG5 0
+
+#define THM_BASE__INST0_SEG0 0x00016600
+#define THM_BASE__INST0_SEG1 0
+#define THM_BASE__INST0_SEG2 0
+#define THM_BASE__INST0_SEG3 0
+#define THM_BASE__INST0_SEG4 0
+#define THM_BASE__INST0_SEG5 0
+
+#define THM_BASE__INST1_SEG0 0
+#define THM_BASE__INST1_SEG1 0
+#define THM_BASE__INST1_SEG2 0
+#define THM_BASE__INST1_SEG3 0
+#define THM_BASE__INST1_SEG4 0
+#define THM_BASE__INST1_SEG5 0
+
+#define THM_BASE__INST2_SEG0 0
+#define THM_BASE__INST2_SEG1 0
+#define THM_BASE__INST2_SEG2 0
+#define THM_BASE__INST2_SEG3 0
+#define THM_BASE__INST2_SEG4 0
+#define THM_BASE__INST2_SEG5 0
+
+#define THM_BASE__INST3_SEG0 0
+#define THM_BASE__INST3_SEG1 0
+#define THM_BASE__INST3_SEG2 0
+#define THM_BASE__INST3_SEG3 0
+#define THM_BASE__INST3_SEG4 0
+#define THM_BASE__INST3_SEG5 0
+
+#define THM_BASE__INST4_SEG0 0
+#define THM_BASE__INST4_SEG1 0
+#define THM_BASE__INST4_SEG2 0
+#define THM_BASE__INST4_SEG3 0
+#define THM_BASE__INST4_SEG4 0
+#define THM_BASE__INST4_SEG5 0
+
+#define THM_BASE__INST5_SEG0 0
+#define THM_BASE__INST5_SEG1 0
+#define THM_BASE__INST5_SEG2 0
+#define THM_BASE__INST5_SEG3 0
+#define THM_BASE__INST5_SEG4 0
+#define THM_BASE__INST5_SEG5 0
+
+#define UMC_BASE__INST0_SEG0 0x00014000
+#define UMC_BASE__INST0_SEG1 0
+#define UMC_BASE__INST0_SEG2 0
+#define UMC_BASE__INST0_SEG3 0
+#define UMC_BASE__INST0_SEG4 0
+#define UMC_BASE__INST0_SEG5 0
+
+#define UMC_BASE__INST1_SEG0 0
+#define UMC_BASE__INST1_SEG1 0
+#define UMC_BASE__INST1_SEG2 0
+#define UMC_BASE__INST1_SEG3 0
+#define UMC_BASE__INST1_SEG4 0
+#define UMC_BASE__INST1_SEG5 0
+
+#define UMC_BASE__INST2_SEG0 0
+#define UMC_BASE__INST2_SEG1 0
+#define UMC_BASE__INST2_SEG2 0
+#define UMC_BASE__INST2_SEG3 0
+#define UMC_BASE__INST2_SEG4 0
+#define UMC_BASE__INST2_SEG5 0
+
+#define UMC_BASE__INST3_SEG0 0
+#define UMC_BASE__INST3_SEG1 0
+#define UMC_BASE__INST3_SEG2 0
+#define UMC_BASE__INST3_SEG3 0
+#define UMC_BASE__INST3_SEG4 0
+#define UMC_BASE__INST3_SEG5 0
+
+#define UMC_BASE__INST4_SEG0 0
+#define UMC_BASE__INST4_SEG1 0
+#define UMC_BASE__INST4_SEG2 0
+#define UMC_BASE__INST4_SEG3 0
+#define UMC_BASE__INST4_SEG4 0
+#define UMC_BASE__INST4_SEG5 0
+
+#define UMC_BASE__INST5_SEG0 0
+#define UMC_BASE__INST5_SEG1 0
+#define UMC_BASE__INST5_SEG2 0
+#define UMC_BASE__INST5_SEG3 0
+#define UMC_BASE__INST5_SEG4 0
+#define UMC_BASE__INST5_SEG5 0
+
+#define UVD_BASE__INST0_SEG0 0x00007800
+#define UVD_BASE__INST0_SEG1 0x00007E00
+#define UVD_BASE__INST0_SEG2 0
+#define UVD_BASE__INST0_SEG3 0
+#define UVD_BASE__INST0_SEG4 0
+#define UVD_BASE__INST0_SEG5 0
+
+#define UVD_BASE__INST1_SEG0 0
+#define UVD_BASE__INST1_SEG1 0x00009000
+#define UVD_BASE__INST1_SEG2 0
+#define UVD_BASE__INST1_SEG3 0
+#define UVD_BASE__INST1_SEG4 0
+#define UVD_BASE__INST1_SEG5 0
+
+#define UVD_BASE__INST2_SEG0 0
+#define UVD_BASE__INST2_SEG1 0
+#define UVD_BASE__INST2_SEG2 0
+#define UVD_BASE__INST2_SEG3 0
+#define UVD_BASE__INST2_SEG4 0
+#define UVD_BASE__INST2_SEG5 0
+
+#define UVD_BASE__INST3_SEG0 0
+#define UVD_BASE__INST3_SEG1 0
+#define UVD_BASE__INST3_SEG2 0
+#define UVD_BASE__INST3_SEG3 0
+#define UVD_BASE__INST3_SEG4 0
+#define UVD_BASE__INST3_SEG5 0
+
+#define UVD_BASE__INST4_SEG0 0
+#define UVD_BASE__INST4_SEG1 0
+#define UVD_BASE__INST4_SEG2 0
+#define UVD_BASE__INST4_SEG3 0
+#define UVD_BASE__INST4_SEG4 0
+#define UVD_BASE__INST4_SEG5 0
+
+#define UVD_BASE__INST5_SEG0 0
+#define UVD_BASE__INST5_SEG1 0
+#define UVD_BASE__INST5_SEG2 0
+#define UVD_BASE__INST5_SEG3 0
+#define UVD_BASE__INST5_SEG4 0
+#define UVD_BASE__INST5_SEG5 0
+
+#define VCE_BASE__INST0_SEG0 0x00008800
+#define VCE_BASE__INST0_SEG1 0
+#define VCE_BASE__INST0_SEG2 0
+#define VCE_BASE__INST0_SEG3 0
+#define VCE_BASE__INST0_SEG4 0
+#define VCE_BASE__INST0_SEG5 0
+
+#define VCE_BASE__INST1_SEG0 0
+#define VCE_BASE__INST1_SEG1 0
+#define VCE_BASE__INST1_SEG2 0
+#define VCE_BASE__INST1_SEG3 0
+#define VCE_BASE__INST1_SEG4 0
+#define VCE_BASE__INST1_SEG5 0
+
+#define VCE_BASE__INST2_SEG0 0
+#define VCE_BASE__INST2_SEG1 0
+#define VCE_BASE__INST2_SEG2 0
+#define VCE_BASE__INST2_SEG3 0
+#define VCE_BASE__INST2_SEG4 0
+#define VCE_BASE__INST2_SEG5 0
+
+#define VCE_BASE__INST3_SEG0 0
+#define VCE_BASE__INST3_SEG1 0
+#define VCE_BASE__INST3_SEG2 0
+#define VCE_BASE__INST3_SEG3 0
+#define VCE_BASE__INST3_SEG4 0
+#define VCE_BASE__INST3_SEG5 0
+
+#define VCE_BASE__INST4_SEG0 0
+#define VCE_BASE__INST4_SEG1 0
+#define VCE_BASE__INST4_SEG2 0
+#define VCE_BASE__INST4_SEG3 0
+#define VCE_BASE__INST4_SEG4 0
+#define VCE_BASE__INST4_SEG5 0
+
+#define VCE_BASE__INST5_SEG0 0
+#define VCE_BASE__INST5_SEG1 0
+#define VCE_BASE__INST5_SEG2 0
+#define VCE_BASE__INST5_SEG3 0
+#define VCE_BASE__INST5_SEG4 0
+#define VCE_BASE__INST5_SEG5 0
+
+#define XDMA_BASE__INST0_SEG0 0x00003400
+#define XDMA_BASE__INST0_SEG1 0
+#define XDMA_BASE__INST0_SEG2 0
+#define XDMA_BASE__INST0_SEG3 0
+#define XDMA_BASE__INST0_SEG4 0
+#define XDMA_BASE__INST0_SEG5 0
+
+#define XDMA_BASE__INST1_SEG0 0
+#define XDMA_BASE__INST1_SEG1 0
+#define XDMA_BASE__INST1_SEG2 0
+#define XDMA_BASE__INST1_SEG3 0
+#define XDMA_BASE__INST1_SEG4 0
+#define XDMA_BASE__INST1_SEG5 0
+
+#define XDMA_BASE__INST2_SEG0 0
+#define XDMA_BASE__INST2_SEG1 0
+#define XDMA_BASE__INST2_SEG2 0
+#define XDMA_BASE__INST2_SEG3 0
+#define XDMA_BASE__INST2_SEG4 0
+#define XDMA_BASE__INST2_SEG5 0
+
+#define XDMA_BASE__INST3_SEG0 0
+#define XDMA_BASE__INST3_SEG1 0
+#define XDMA_BASE__INST3_SEG2 0
+#define XDMA_BASE__INST3_SEG3 0
+#define XDMA_BASE__INST3_SEG4 0
+#define XDMA_BASE__INST3_SEG5 0
+
+#define XDMA_BASE__INST4_SEG0 0
+#define XDMA_BASE__INST4_SEG1 0
+#define XDMA_BASE__INST4_SEG2 0
+#define XDMA_BASE__INST4_SEG3 0
+#define XDMA_BASE__INST4_SEG4 0
+#define XDMA_BASE__INST4_SEG5 0
+
+#define XDMA_BASE__INST5_SEG0 0
+#define XDMA_BASE__INST5_SEG1 0
+#define XDMA_BASE__INST5_SEG2 0
+#define XDMA_BASE__INST5_SEG3 0
+#define XDMA_BASE__INST5_SEG4 0
+#define XDMA_BASE__INST5_SEG5 0
+
+#define RSMU_BASE__INST0_SEG0 0x00012000
+#define RSMU_BASE__INST0_SEG1 0
+#define RSMU_BASE__INST0_SEG2 0
+#define RSMU_BASE__INST0_SEG3 0
+#define RSMU_BASE__INST0_SEG4 0
+#define RSMU_BASE__INST0_SEG5 0
+
+#define RSMU_BASE__INST1_SEG0 0
+#define RSMU_BASE__INST1_SEG1 0
+#define RSMU_BASE__INST1_SEG2 0
+#define RSMU_BASE__INST1_SEG3 0
+#define RSMU_BASE__INST1_SEG4 0
+#define RSMU_BASE__INST1_SEG5 0
+
+#define RSMU_BASE__INST2_SEG0 0
+#define RSMU_BASE__INST2_SEG1 0
+#define RSMU_BASE__INST2_SEG2 0
+#define RSMU_BASE__INST2_SEG3 0
+#define RSMU_BASE__INST2_SEG4 0
+#define RSMU_BASE__INST2_SEG5 0
+
+#define RSMU_BASE__INST3_SEG0 0
+#define RSMU_BASE__INST3_SEG1 0
+#define RSMU_BASE__INST3_SEG2 0
+#define RSMU_BASE__INST3_SEG3 0
+#define RSMU_BASE__INST3_SEG4 0
+#define RSMU_BASE__INST3_SEG5 0
+
+#define RSMU_BASE__INST4_SEG0 0
+#define RSMU_BASE__INST4_SEG1 0
+#define RSMU_BASE__INST4_SEG2 0
+#define RSMU_BASE__INST4_SEG3 0
+#define RSMU_BASE__INST4_SEG4 0
+#define RSMU_BASE__INST4_SEG5 0
+
+#define RSMU_BASE__INST5_SEG0 0
+#define RSMU_BASE__INST5_SEG1 0
+#define RSMU_BASE__INST5_SEG2 0
+#define RSMU_BASE__INST5_SEG3 0
+#define RSMU_BASE__INST5_SEG4 0
+#define RSMU_BASE__INST5_SEG5 0
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 7e8ad30d98e2..b493369e6d0f 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -25,30 +25,16 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/firmware.h>
#include "amd_shared.h"
#include "amd_powerplay.h"
#include "power_state.h"
#include "amdgpu.h"
#include "hwmgr.h"
-#define PP_DPM_DISABLED 0xCCCC
-
-static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id,
- enum amd_pm_state_type *user_state);
static const struct amd_pm_funcs pp_dpm_funcs;
-static inline int pp_check(struct pp_hwmgr *hwmgr)
-{
- if (hwmgr == NULL || hwmgr->smumgr_funcs == NULL)
- return -EINVAL;
-
- if (hwmgr->pm_en == 0 || hwmgr->hwmgr_func == NULL)
- return PP_DPM_DISABLED;
-
- return 0;
-}
-
static int amd_powerplay_create(struct amdgpu_device *adev)
{
struct pp_hwmgr *hwmgr;
@@ -61,19 +47,21 @@ static int amd_powerplay_create(struct amdgpu_device *adev)
return -ENOMEM;
hwmgr->adev = adev;
- hwmgr->pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
+ hwmgr->not_vf = !amdgpu_sriov_vf(adev);
+ hwmgr->pm_en = (amdgpu_dpm && hwmgr->not_vf) ? true : false;
hwmgr->device = amdgpu_cgs_create_device(adev);
mutex_init(&hwmgr->smu_lock);
hwmgr->chip_family = adev->family;
hwmgr->chip_id = adev->asic_type;
- hwmgr->feature_mask = amdgpu_pp_feature_mask;
+ hwmgr->feature_mask = adev->powerplay.pp_feature;
+ hwmgr->display_config = &adev->pm.pm_display_cfg;
adev->powerplay.pp_handle = hwmgr;
adev->powerplay.pp_funcs = &pp_dpm_funcs;
return 0;
}
-static int amd_powerplay_destroy(struct amdgpu_device *adev)
+static void amd_powerplay_destroy(struct amdgpu_device *adev)
{
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
@@ -82,8 +70,6 @@ static int amd_powerplay_destroy(struct amdgpu_device *adev)
kfree(hwmgr);
hwmgr = NULL;
-
- return 0;
}
static int pp_early_init(void *handle)
@@ -109,18 +95,9 @@ static int pp_sw_init(void *handle)
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret >= 0) {
- if (hwmgr->smumgr_funcs->smu_init == NULL)
- return -EINVAL;
-
- ret = hwmgr->smumgr_funcs->smu_init(hwmgr);
+ ret = hwmgr_sw_init(hwmgr);
- phm_register_irq_handlers(hwmgr);
-
- pr_debug("amdgpu: powerplay sw initialized\n");
- }
+ pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully");
return ret;
}
@@ -129,16 +106,14 @@ static int pp_sw_fini(void *handle)
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret = 0;
- ret = pp_check(hwmgr);
- if (ret >= 0) {
- if (hwmgr->smumgr_funcs->smu_fini != NULL)
- hwmgr->smumgr_funcs->smu_fini(hwmgr);
- }
+ hwmgr_sw_fini(hwmgr);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
+ release_firmware(adev->pm.fw);
+ adev->pm.fw = NULL;
amdgpu_ucode_fini_bo(adev);
+ }
return 0;
}
@@ -152,55 +127,76 @@ static int pp_hw_init(void *handle)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
amdgpu_ucode_init_bo(adev);
- ret = pp_check(hwmgr);
+ ret = hwmgr_hw_init(hwmgr);
- if (ret >= 0) {
- if (hwmgr->smumgr_funcs->start_smu == NULL)
- return -EINVAL;
+ if (ret)
+ pr_err("powerplay hw init failed\n");
- if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
- pr_err("smc start failed\n");
- hwmgr->smumgr_funcs->smu_fini(hwmgr);
- return -EINVAL;
- }
- if (ret == PP_DPM_DISABLED)
- goto exit;
- ret = hwmgr_hw_init(hwmgr);
- if (ret)
- goto exit;
- }
return ret;
-exit:
- hwmgr->pm_en = 0;
- cgs_notify_dpm_enabled(hwmgr->device, false);
- return 0;
-
}
static int pp_hw_fini(void *handle)
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret = 0;
- ret = pp_check(hwmgr);
- if (ret == 0)
- hwmgr_hw_fini(hwmgr);
+ hwmgr_hw_fini(hwmgr);
return 0;
}
+static void pp_reserve_vram_for_smu(struct amdgpu_device *adev)
+{
+ int r = -EINVAL;
+ void *cpu_ptr = NULL;
+ uint64_t gpu_addr;
+ struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+
+ if (amdgpu_bo_create_kernel(adev, adev->pm.smu_prv_buffer_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->pm.smu_prv_buffer,
+ &gpu_addr,
+ &cpu_ptr)) {
+ DRM_ERROR("amdgpu: failed to create smu prv buffer\n");
+ return;
+ }
+
+ if (hwmgr->hwmgr_func->notify_cac_buffer_info)
+ r = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr,
+ lower_32_bits((unsigned long)cpu_ptr),
+ upper_32_bits((unsigned long)cpu_ptr),
+ lower_32_bits(gpu_addr),
+ upper_32_bits(gpu_addr),
+ adev->pm.smu_prv_buffer_size);
+
+ if (r) {
+ amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL);
+ adev->pm.smu_prv_buffer = NULL;
+ DRM_ERROR("amdgpu: failed to notify SMU buffer address\n");
+ }
+}
+
static int pp_late_init(void *handle)
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
+ int ret;
- if (ret == 0)
- pp_dpm_dispatch_tasks(hwmgr,
+ if (hwmgr && hwmgr->pm_en) {
+ mutex_lock(&hwmgr->smu_lock);
+ hwmgr_handle_task(hwmgr,
AMD_PP_TASK_COMPLETE_INIT, NULL);
+ mutex_unlock(&hwmgr->smu_lock);
+ }
+ if (adev->pm.smu_prv_buffer_size != 0)
+ pp_reserve_vram_for_smu(adev);
+
+ if (hwmgr->hwmgr_func->gfx_off_control &&
+ (hwmgr->feature_mask & PP_GFXOFF_MASK)) {
+ ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr, true);
+ if (ret)
+ pr_err("gfx off enabling failed!\n");
+ }
return 0;
}
@@ -209,6 +205,8 @@ static void pp_late_fini(void *handle)
{
struct amdgpu_device *adev = handle;
+ if (adev->pm.smu_prv_buffer)
+ amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL);
amd_powerplay_destroy(adev);
}
@@ -233,12 +231,18 @@ static int pp_set_powergating_state(void *handle,
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret = 0;
+ int ret;
- ret = pp_check(hwmgr);
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
- if (ret)
- return ret;
+ if (hwmgr->hwmgr_func->gfx_off_control) {
+ /* Enable/disable GFX off through SMU */
+ ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr,
+ state == AMD_PG_STATE_GATE);
+ if (ret)
+ pr_err("gfx off control failed!\n");
+ }
if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -254,38 +258,16 @@ static int pp_suspend(void *handle)
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret = 0;
- ret = pp_check(hwmgr);
- if (ret == 0)
- hwmgr_hw_suspend(hwmgr);
- return 0;
+ return hwmgr_suspend(hwmgr);
}
static int pp_resume(void *handle)
{
struct amdgpu_device *adev = handle;
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
- int ret;
-
- ret = pp_check(hwmgr);
-
- if (ret < 0)
- return ret;
-
- if (hwmgr->smumgr_funcs->start_smu == NULL)
- return -EINVAL;
-
- if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
- pr_err("smc start failed\n");
- hwmgr->smumgr_funcs->smu_fini(hwmgr);
- return -EINVAL;
- }
-
- if (ret == PP_DPM_DISABLED)
- return 0;
- return hwmgr_hw_resume(hwmgr);
+ return hwmgr_resume(hwmgr);
}
static int pp_set_clockgating_state(void *handle,
@@ -334,12 +316,9 @@ static int pp_dpm_fw_loading_complete(void *handle)
static int pp_set_clockgating_by_smu(void *handle, uint32_t msg_id)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->update_clock_gatings == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -362,10 +341,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr,
if (*level & profile_mode_mask) {
hwmgr->saved_dpm_level = hwmgr->dpm_level;
hwmgr->en_umd_pstate = true;
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_UNGATE);
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_UNGATE);
}
@@ -375,10 +354,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr,
if (*level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)
*level = hwmgr->saved_dpm_level;
hwmgr->en_umd_pstate = false;
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_CG_STATE_GATE);
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX,
AMD_PG_STATE_GATE);
}
@@ -389,12 +368,9 @@ static int pp_dpm_force_performance_level(void *handle,
enum amd_dpm_forced_level level)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (level == hwmgr->dpm_level)
return 0;
@@ -412,13 +388,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level(
void *handle)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
enum amd_dpm_forced_level level;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
level = hwmgr->dpm_level;
@@ -429,13 +402,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level(
static uint32_t pp_dpm_get_sclk(void *handle, bool low)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
uint32_t clk = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
if (hwmgr->hwmgr_func->get_sclk == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -450,13 +420,10 @@ static uint32_t pp_dpm_get_sclk(void *handle, bool low)
static uint32_t pp_dpm_get_mclk(void *handle, bool low)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
uint32_t clk = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
if (hwmgr->hwmgr_func->get_mclk == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -471,11 +438,8 @@ static uint32_t pp_dpm_get_mclk(void *handle, bool low)
static void pp_dpm_powergate_vce(void *handle, bool gate)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
+ if (!hwmgr || !hwmgr->pm_en)
return;
if (hwmgr->hwmgr_func->powergate_vce == NULL) {
@@ -490,11 +454,8 @@ static void pp_dpm_powergate_vce(void *handle, bool gate)
static void pp_dpm_powergate_uvd(void *handle, bool gate)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
- if (ret)
+ if (!hwmgr || !hwmgr->pm_en)
return;
if (hwmgr->hwmgr_func->powergate_uvd == NULL) {
@@ -512,10 +473,8 @@ static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id,
int ret = 0;
struct pp_hwmgr *hwmgr = handle;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
ret = hwmgr_handle_task(hwmgr, task_id, user_state);
@@ -528,15 +487,9 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
{
struct pp_hwmgr *hwmgr = handle;
struct pp_power_state *state;
- int ret = 0;
enum amd_pm_state_type pm_type;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (hwmgr->current_ps == NULL)
+ if (!hwmgr || !hwmgr->pm_en || !hwmgr->current_ps)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -568,11 +521,8 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
- if (ret)
+ if (!hwmgr || !hwmgr->pm_en)
return;
if (hwmgr->hwmgr_func->set_fan_control_mode == NULL) {
@@ -587,13 +537,10 @@ static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode)
static uint32_t pp_dpm_get_fan_control_mode(void *handle)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
uint32_t mode = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
if (hwmgr->hwmgr_func->get_fan_control_mode == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -610,10 +557,8 @@ static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -630,10 +575,8 @@ static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -651,10 +594,8 @@ static int pp_dpm_get_fan_speed_rpm(void *handle, uint32_t *rpm)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->get_fan_speed_rpm == NULL)
return -EINVAL;
@@ -670,16 +611,10 @@ static int pp_dpm_get_pp_num_states(void *handle,
{
struct pp_hwmgr *hwmgr = handle;
int i;
- int ret = 0;
memset(data, 0, sizeof(*data));
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (hwmgr->ps == NULL)
+ if (!hwmgr || !hwmgr->pm_en ||!hwmgr->ps)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -713,15 +648,9 @@ static int pp_dpm_get_pp_num_states(void *handle,
static int pp_dpm_get_pp_table(void *handle, char **table)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
int size = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (!hwmgr->soft_pp_table)
+ if (!hwmgr || !hwmgr->pm_en ||!hwmgr->soft_pp_table)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -736,10 +665,6 @@ static int amd_powerplay_reset(void *handle)
struct pp_hwmgr *hwmgr = handle;
int ret;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
ret = hwmgr_hw_fini(hwmgr);
if (ret)
return ret;
@@ -754,40 +679,38 @@ static int amd_powerplay_reset(void *handle)
static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
+ int ret = -ENOMEM;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
if (!hwmgr->hardcode_pp_table) {
hwmgr->hardcode_pp_table = kmemdup(hwmgr->soft_pp_table,
hwmgr->soft_pp_table_size,
GFP_KERNEL);
- if (!hwmgr->hardcode_pp_table) {
- mutex_unlock(&hwmgr->smu_lock);
- return -ENOMEM;
- }
+ if (!hwmgr->hardcode_pp_table)
+ goto err;
}
memcpy(hwmgr->hardcode_pp_table, buf, size);
hwmgr->soft_pp_table = hwmgr->hardcode_pp_table;
- mutex_unlock(&hwmgr->smu_lock);
ret = amd_powerplay_reset(handle);
if (ret)
- return ret;
+ goto err;
if (hwmgr->hwmgr_func->avfs_control) {
ret = hwmgr->hwmgr_func->avfs_control(hwmgr, false);
if (ret)
- return ret;
+ goto err;
}
-
+ mutex_unlock(&hwmgr->smu_lock);
return 0;
+err:
+ mutex_unlock(&hwmgr->smu_lock);
+ return ret;
}
static int pp_dpm_force_clock_level(void *handle,
@@ -796,10 +719,8 @@ static int pp_dpm_force_clock_level(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->force_clock_level == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -820,10 +741,8 @@ static int pp_dpm_print_clock_levels(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->print_clock_levels == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -840,10 +759,8 @@ static int pp_dpm_get_sclk_od(void *handle)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->get_sclk_od == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -860,10 +777,8 @@ static int pp_dpm_set_sclk_od(void *handle, uint32_t value)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->set_sclk_od == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -881,10 +796,8 @@ static int pp_dpm_get_mclk_od(void *handle)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->get_mclk_od == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -901,10 +814,8 @@ static int pp_dpm_set_mclk_od(void *handle, uint32_t value)
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->set_mclk_od == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -922,11 +833,7 @@ static int pp_dpm_read_sensor(void *handle, int idx,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (value == NULL)
+ if (!hwmgr || !hwmgr->pm_en || !value)
return -EINVAL;
switch (idx) {
@@ -948,14 +855,11 @@ static struct amd_vce_state*
pp_dpm_get_vce_clock_state(void *handle, unsigned idx)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
- if (ret)
+ if (!hwmgr || !hwmgr->pm_en)
return NULL;
- if (hwmgr && idx < hwmgr->num_vce_state_tables)
+ if (idx < hwmgr->num_vce_state_tables)
return &hwmgr->vce_states[idx];
return NULL;
}
@@ -964,7 +868,7 @@ static int pp_get_power_profile_mode(void *handle, char *buf)
{
struct pp_hwmgr *hwmgr = handle;
- if (!buf || pp_check(hwmgr))
+ if (!hwmgr || !hwmgr->pm_en || !buf)
return -EINVAL;
if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) {
@@ -980,12 +884,12 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size)
struct pp_hwmgr *hwmgr = handle;
int ret = -EINVAL;
- if (pp_check(hwmgr))
- return -EINVAL;
+ if (!hwmgr || !hwmgr->pm_en)
+ return ret;
if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) {
pr_info("%s was not implemented.\n", __func__);
- return -EINVAL;
+ return ret;
}
mutex_lock(&hwmgr->smu_lock);
if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL)
@@ -998,7 +902,7 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3
{
struct pp_hwmgr *hwmgr = handle;
- if (pp_check(hwmgr))
+ if (!hwmgr || !hwmgr->pm_en)
return -EINVAL;
if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) {
@@ -1016,7 +920,7 @@ static int pp_dpm_switch_power_profile(void *handle,
long workload;
uint32_t index;
- if (pp_check(hwmgr))
+ if (!hwmgr || !hwmgr->pm_en)
return -EINVAL;
if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) {
@@ -1048,46 +952,12 @@ static int pp_dpm_switch_power_profile(void *handle,
return 0;
}
-static int pp_dpm_notify_smu_memory_info(void *handle,
- uint32_t virtual_addr_low,
- uint32_t virtual_addr_hi,
- uint32_t mc_addr_low,
- uint32_t mc_addr_hi,
- uint32_t size)
-{
- struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (hwmgr->hwmgr_func->notify_cac_buffer_info == NULL) {
- pr_info("%s was not implemented.\n", __func__);
- return -EINVAL;
- }
-
- mutex_lock(&hwmgr->smu_lock);
-
- ret = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, virtual_addr_low,
- virtual_addr_hi, mc_addr_low, mc_addr_hi,
- size);
-
- mutex_unlock(&hwmgr->smu_lock);
-
- return ret;
-}
-
static int pp_set_power_limit(void *handle, uint32_t limit)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->set_power_limit == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -1104,20 +974,14 @@ static int pp_set_power_limit(void *handle, uint32_t limit)
hwmgr->hwmgr_func->set_power_limit(hwmgr, limit);
hwmgr->power_limit = limit;
mutex_unlock(&hwmgr->smu_lock);
- return ret;
+ return 0;
}
static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
-
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (limit == NULL)
+ if (!hwmgr || !hwmgr->pm_en ||!limit)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1129,19 +993,16 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
mutex_unlock(&hwmgr->smu_lock);
- return ret;
+ return 0;
}
static int pp_display_configuration_change(void *handle,
const struct amd_pp_display_configuration *display_config)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
phm_store_dal_configuration_data(hwmgr, display_config);
@@ -1155,12 +1016,7 @@ static int pp_get_display_power_level(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (output == NULL)
+ if (!hwmgr || !hwmgr->pm_en ||!output)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1177,10 +1033,8 @@ static int pp_get_current_clocks(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1225,10 +1079,8 @@ static int pp_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struc
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (clocks == NULL)
return -EINVAL;
@@ -1246,11 +1098,7 @@ static int pp_get_clock_by_type_with_latency(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (!clocks)
+ if (!hwmgr || !hwmgr->pm_en ||!clocks)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1266,11 +1114,7 @@ static int pp_get_clock_by_type_with_voltage(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (!clocks)
+ if (!hwmgr || !hwmgr->pm_en ||!clocks)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1287,11 +1131,7 @@ static int pp_set_watermarks_for_clocks_ranges(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (!wm_with_clock_ranges)
+ if (!hwmgr || !hwmgr->pm_en ||!wm_with_clock_ranges)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1308,11 +1148,7 @@ static int pp_display_clock_voltage_request(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
- if (ret)
- return ret;
-
- if (!clock)
+ if (!hwmgr || !hwmgr->pm_en ||!clock)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1328,12 +1164,7 @@ static int pp_get_display_mode_validation_clocks(void *handle,
struct pp_hwmgr *hwmgr = handle;
int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
-
- if (clocks == NULL)
+ if (!hwmgr || !hwmgr->pm_en ||!clocks)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -1348,12 +1179,9 @@ static int pp_get_display_mode_validation_clocks(void *handle,
static int pp_set_mmhub_powergating_by_smu(void *handle)
{
struct pp_hwmgr *hwmgr = handle;
- int ret = 0;
- ret = pp_check(hwmgr);
-
- if (ret)
- return ret;
+ if (!hwmgr || !hwmgr->pm_en)
+ return -EINVAL;
if (hwmgr->hwmgr_func->set_mmhub_powergating_by_smu == NULL) {
pr_info("%s was not implemented.\n", __func__);
@@ -1390,7 +1218,6 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
.get_vce_clock_state = pp_dpm_get_vce_clock_state,
.switch_power_profile = pp_dpm_switch_power_profile,
.set_clockgating_by_smu = pp_set_clockgating_by_smu,
- .notify_smu_memory_info = pp_dpm_notify_smu_memory_info,
.get_power_profile_mode = pp_get_power_profile_mode,
.set_power_profile_mode = pp_set_power_profile_mode,
.odn_edit_dpm_table = pp_odn_edit_dpm_table,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index ae2e9339dd6b..a0bb921fac22 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -75,8 +75,7 @@ int phm_set_power_state(struct pp_hwmgr *hwmgr,
int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
{
- int ret = 1;
- bool enabled;
+ int ret = -EINVAL;;
PHM_FUNC_CHECK(hwmgr);
if (smum_is_dpm_running(hwmgr)) {
@@ -87,17 +86,12 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable)
ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
- enabled = ret == 0;
-
- cgs_notify_dpm_enabled(hwmgr->device, enabled);
-
return ret;
}
int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
{
- int ret = -1;
- bool enabled;
+ int ret = -EINVAL;
PHM_FUNC_CHECK(hwmgr);
@@ -109,10 +103,6 @@ int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
if (hwmgr->hwmgr_func->dynamic_state_management_disable)
ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr);
- enabled = ret == 0 ? false : true;
-
- cgs_notify_dpm_enabled(hwmgr->device, enabled);
-
return ret;
}
@@ -142,6 +132,15 @@ int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
return 0;
}
+int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr)
+{
+ PHM_FUNC_CHECK(hwmgr);
+
+ if (hwmgr->hwmgr_func->apply_clocks_adjust_rules != NULL)
+ return hwmgr->hwmgr_func->apply_clocks_adjust_rules(hwmgr);
+ return 0;
+}
+
int phm_powerdown_uvd(struct pp_hwmgr *hwmgr)
{
PHM_FUNC_CHECK(hwmgr);
@@ -171,6 +170,16 @@ int phm_disable_clock_power_gatings(struct pp_hwmgr *hwmgr)
return 0;
}
+int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr)
+{
+ PHM_FUNC_CHECK(hwmgr);
+
+ if (NULL != hwmgr->hwmgr_func->pre_display_config_changed)
+ hwmgr->hwmgr_func->pre_display_config_changed(hwmgr);
+
+ return 0;
+
+}
int phm_display_configuration_changed(struct pp_hwmgr *hwmgr)
{
@@ -275,13 +284,11 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
if (display_config == NULL)
return -EINVAL;
- hwmgr->display_config = *display_config;
-
if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk)
- hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, hwmgr->display_config.min_dcef_deep_sleep_set_clk);
+ hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk);
- for (index = 0; index < hwmgr->display_config.num_path_including_non_display; index++) {
- if (hwmgr->display_config.displays[index].controller_id != 0)
+ for (index = 0; index < display_config->num_path_including_non_display; index++) {
+ if (display_config->displays[index].controller_id != 0)
number_of_active_display++;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 42982055b161..e63bc47dc715 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -40,6 +40,7 @@ extern const struct pp_smumgr_func iceland_smu_funcs;
extern const struct pp_smumgr_func tonga_smu_funcs;
extern const struct pp_smumgr_func fiji_smu_funcs;
extern const struct pp_smumgr_func polaris10_smu_funcs;
+extern const struct pp_smumgr_func vegam_smu_funcs;
extern const struct pp_smumgr_func vega10_smu_funcs;
extern const struct pp_smumgr_func vega12_smu_funcs;
extern const struct pp_smumgr_func smu10_smu_funcs;
@@ -76,7 +77,7 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr)
int hwmgr_early_init(struct pp_hwmgr *hwmgr)
{
- if (hwmgr == NULL)
+ if (!hwmgr)
return -EINVAL;
hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT;
@@ -95,7 +96,8 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
hwmgr->smumgr_funcs = &ci_smu_funcs;
ci_set_asic_special_caps(hwmgr);
hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK |
- PP_ENABLE_GFX_CG_THRU_SMU);
+ PP_ENABLE_GFX_CG_THRU_SMU |
+ PP_GFXOFF_MASK);
hwmgr->pp_table_version = PP_TABLE_V0;
hwmgr->od_enabled = false;
smu7_init_function_pointers(hwmgr);
@@ -103,9 +105,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
case AMDGPU_FAMILY_CZ:
hwmgr->od_enabled = false;
hwmgr->smumgr_funcs = &smu8_smu_funcs;
+ hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
smu8_init_function_pointers(hwmgr);
break;
case AMDGPU_FAMILY_VI:
+ hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
switch (hwmgr->chip_id) {
case CHIP_TOPAZ:
hwmgr->smumgr_funcs = &iceland_smu_funcs;
@@ -133,14 +137,21 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
polaris_set_asic_special_caps(hwmgr);
hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK);
break;
+ case CHIP_VEGAM:
+ hwmgr->smumgr_funcs = &vegam_smu_funcs;
+ polaris_set_asic_special_caps(hwmgr);
+ hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK);
+ break;
default:
return -EINVAL;
}
smu7_init_function_pointers(hwmgr);
break;
case AMDGPU_FAMILY_AI:
+ hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
switch (hwmgr->chip_id) {
case CHIP_VEGA10:
+ case CHIP_VEGA20:
hwmgr->smumgr_funcs = &vega10_smu_funcs;
vega10_hwmgr_init(hwmgr);
break;
@@ -170,22 +181,58 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
return 0;
}
+int hwmgr_sw_init(struct pp_hwmgr *hwmgr)
+{
+ if (!hwmgr|| !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->smu_init)
+ return -EINVAL;
+
+ phm_register_irq_handlers(hwmgr);
+
+ return hwmgr->smumgr_funcs->smu_init(hwmgr);
+}
+
+
+int hwmgr_sw_fini(struct pp_hwmgr *hwmgr)
+{
+ if (hwmgr && hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->smu_fini)
+ hwmgr->smumgr_funcs->smu_fini(hwmgr);
+
+ return 0;
+}
+
int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
{
int ret = 0;
- if (hwmgr == NULL)
+ if (!hwmgr || !hwmgr->smumgr_funcs)
return -EINVAL;
- if (hwmgr->pptable_func == NULL ||
- hwmgr->pptable_func->pptable_init == NULL ||
- hwmgr->hwmgr_func->backend_init == NULL)
- return -EINVAL;
+ if (hwmgr->smumgr_funcs->start_smu) {
+ ret = hwmgr->smumgr_funcs->start_smu(hwmgr);
+ if (ret) {
+ pr_err("smc start failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!hwmgr->pm_en)
+ return 0;
+
+ if (!hwmgr->pptable_func ||
+ !hwmgr->pptable_func->pptable_init ||
+ !hwmgr->hwmgr_func->backend_init) {
+ hwmgr->pm_en = false;
+ pr_info("dpm not supported \n");
+ return 0;
+ }
ret = hwmgr->pptable_func->pptable_init(hwmgr);
if (ret)
goto err;
+ ((struct amdgpu_device *)hwmgr->adev)->pm.no_fan =
+ hwmgr->thermal_controller.fanInfo.bNoFan;
+
ret = hwmgr->hwmgr_func->backend_init(hwmgr);
if (ret)
goto err1;
@@ -206,6 +253,8 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
if (ret)
goto err2;
+ ((struct amdgpu_device *)hwmgr->adev)->pm.dpm_enabled = true;
+
return 0;
err2:
if (hwmgr->hwmgr_func->backend_fini)
@@ -214,14 +263,13 @@ err1:
if (hwmgr->pptable_func->pptable_fini)
hwmgr->pptable_func->pptable_fini(hwmgr);
err:
- pr_err("amdgpu: powerplay initialization failed\n");
return ret;
}
int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
{
- if (hwmgr == NULL)
- return -EINVAL;
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
phm_stop_thermal_controller(hwmgr);
psm_set_boot_states(hwmgr);
@@ -236,12 +284,12 @@ int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
return psm_fini_power_state_table(hwmgr);
}
-int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr)
+int hwmgr_suspend(struct pp_hwmgr *hwmgr)
{
int ret = 0;
- if (hwmgr == NULL)
- return -EINVAL;
+ if (!hwmgr || !hwmgr->pm_en)
+ return 0;
phm_disable_smc_firmware_ctf(hwmgr);
ret = psm_set_boot_states(hwmgr);
@@ -255,13 +303,23 @@ int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr)
return ret;
}
-int hwmgr_hw_resume(struct pp_hwmgr *hwmgr)
+int hwmgr_resume(struct pp_hwmgr *hwmgr)
{
int ret = 0;
- if (hwmgr == NULL)
+ if (!hwmgr)
return -EINVAL;
+ if (hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->start_smu) {
+ if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
+ pr_err("smc start failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!hwmgr->pm_en)
+ return 0;
+
ret = phm_setup_asic(hwmgr);
if (ret)
return ret;
@@ -270,9 +328,6 @@ int hwmgr_hw_resume(struct pp_hwmgr *hwmgr)
if (ret)
return ret;
ret = phm_start_thermal_controller(hwmgr);
- if (ret)
- return ret;
-
ret |= psm_set_performance_states(hwmgr);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index 0f2851b5b368..0af13c154328 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -46,7 +46,7 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr)
sizeof(struct pp_power_state);
if (table_entries == 0 || size == 0) {
- pr_warn("Please check whether power state management is suppported on this asic\n");
+ pr_warn("Please check whether power state management is supported on this asic\n");
return 0;
}
@@ -265,6 +265,15 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip,
if (skip)
return 0;
+ if (!hwmgr->ps)
+ /*
+ * for vega12/vega20 which does not support power state manager
+ * DAL clock limits should also be honoured
+ */
+ phm_apply_clock_adjust_rules(hwmgr);
+
+ phm_pre_display_configuration_changed(hwmgr);
+
phm_display_configuration_changed(hwmgr);
if (hwmgr->ps)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index c6febbf0bf69..7047e29755c3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -23,7 +23,8 @@
#include "pp_debug.h"
#include <linux/module.h>
#include <linux/slab.h>
-
+#include <linux/delay.h>
+#include "atom.h"
#include "ppatomctrl.h"
#include "atombios.h"
#include "cgs_common.h"
@@ -128,7 +129,6 @@ static int atomctrl_set_mc_reg_address_table(
return 0;
}
-
int atomctrl_initialize_mc_reg_table(
struct pp_hwmgr *hwmgr,
uint8_t module_index,
@@ -141,7 +141,7 @@ int atomctrl_initialize_mc_reg_table(
u16 size;
vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev);
if (module_index >= vram_info->ucNumOfVRAMModule) {
@@ -174,6 +174,8 @@ int atomctrl_set_engine_dram_timings_rv770(
uint32_t engine_clock,
uint32_t memory_clock)
{
+ struct amdgpu_device *adev = hwmgr->adev;
+
SET_ENGINE_CLOCK_PS_ALLOCATION engine_clock_parameters;
/* They are both in 10KHz Units. */
@@ -184,9 +186,10 @@ int atomctrl_set_engine_dram_timings_rv770(
/* in 10 khz units.*/
engine_clock_parameters.sReserved.ulClock =
cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK);
- return cgs_atom_exec_cmd_table(hwmgr->device,
+
+ return amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
- &engine_clock_parameters);
+ (uint32_t *)&engine_clock_parameters);
}
/**
@@ -203,7 +206,7 @@ static ATOM_VOLTAGE_OBJECT_INFO *get_voltage_info_table(void *device)
union voltage_object_info *voltage_info;
voltage_info = (union voltage_object_info *)
- cgs_atom_get_data_table(device, index,
+ smu_atom_get_data_table(device, index,
&size, &frev, &crev);
if (voltage_info != NULL)
@@ -247,16 +250,16 @@ int atomctrl_get_memory_pll_dividers_si(
pp_atomctrl_memory_clock_param *mpll_param,
bool strobe_mode)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters;
int result;
mpll_parameters.ulClock = cpu_to_le32(clock_value);
mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0);
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
- &mpll_parameters);
+ (uint32_t *)&mpll_parameters);
if (0 == result) {
mpll_param->mpll_fb_divider.clk_frac =
@@ -295,14 +298,15 @@ int atomctrl_get_memory_pll_dividers_si(
int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters;
int result;
mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
- &mpll_parameters);
+ (uint32_t *)&mpll_parameters);
if (!result)
mpll_param->mpll_post_divider =
@@ -311,19 +315,49 @@ int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
return result;
}
+int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr,
+ uint32_t clock_value,
+ pp_atomctrl_memory_clock_param_ai *mpll_param)
+{
+ struct amdgpu_device *adev = hwmgr->adev;
+ COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 mpll_parameters = {{0}, 0, 0};
+ int result;
+
+ mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
+
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
+ (uint32_t *)&mpll_parameters);
+
+ /* VEGAM's mpll takes sometime to finish computing */
+ udelay(10);
+
+ if (!result) {
+ mpll_param->ulMclk_fcw_int =
+ le16_to_cpu(mpll_parameters.usMclk_fcw_int);
+ mpll_param->ulMclk_fcw_frac =
+ le16_to_cpu(mpll_parameters.usMclk_fcw_frac);
+ mpll_param->ulClock =
+ le32_to_cpu(mpll_parameters.ulClock.ulClock);
+ mpll_param->ulPostDiv = mpll_parameters.ulClock.ucPostDiv;
+ }
+
+ return result;
+}
+
int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
uint32_t clock_value,
pp_atomctrl_clock_dividers_kong *dividers)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters;
int result;
pll_parameters.ulClock = cpu_to_le32(clock_value);
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
- &pll_parameters);
+ (uint32_t *)&pll_parameters);
if (0 == result) {
dividers->pll_post_divider = pll_parameters.ucPostDiv;
@@ -338,16 +372,16 @@ int atomctrl_get_engine_pll_dividers_vi(
uint32_t clock_value,
pp_atomctrl_clock_dividers_vi *dividers)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
int result;
pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
- &pll_patameters);
+ (uint32_t *)&pll_patameters);
if (0 == result) {
dividers->pll_post_divider =
@@ -375,16 +409,16 @@ int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr,
uint32_t clock_value,
pp_atomctrl_clock_dividers_ai *dividers)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters;
int result;
pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
- &pll_patameters);
+ (uint32_t *)&pll_patameters);
if (0 == result) {
dividers->usSclk_fcw_frac = le16_to_cpu(pll_patameters.usSclk_fcw_frac);
@@ -407,6 +441,7 @@ int atomctrl_get_dfs_pll_dividers_vi(
uint32_t clock_value,
pp_atomctrl_clock_dividers_vi *dividers)
{
+ struct amdgpu_device *adev = hwmgr->adev;
COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
int result;
@@ -414,10 +449,9 @@ int atomctrl_get_dfs_pll_dividers_vi(
pll_patameters.ulClock.ucPostDiv =
COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK;
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
- &pll_patameters);
+ (uint32_t *)&pll_patameters);
if (0 == result) {
dividers->pll_post_divider =
@@ -452,7 +486,7 @@ uint32_t atomctrl_get_reference_clock(struct pp_hwmgr *hwmgr)
uint32_t clock;
fw_info = (ATOM_FIRMWARE_INFO *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, FirmwareInfo),
&size, &frev, &crev);
@@ -476,7 +510,7 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3(
uint8_t voltage_mode)
{
ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
- (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+ (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
bool ret;
PP_ASSERT_WITH_CODE((NULL != voltage_info),
@@ -495,7 +529,7 @@ int atomctrl_get_voltage_table_v3(
pp_atomctrl_voltage_table *voltage_table)
{
ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
- (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+ (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
const ATOM_VOLTAGE_OBJECT_V3 *voltage_object;
unsigned int i;
@@ -572,7 +606,7 @@ static ATOM_GPIO_PIN_LUT *get_gpio_lookup_table(void *device)
void *table_address;
table_address = (ATOM_GPIO_PIN_LUT *)
- cgs_atom_get_data_table(device,
+ smu_atom_get_data_table(device,
GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT),
&size, &frev, &crev);
@@ -592,7 +626,7 @@ bool atomctrl_get_pp_assign_pin(
{
bool bRet = false;
ATOM_GPIO_PIN_LUT *gpio_lookup_table =
- get_gpio_lookup_table(hwmgr->device);
+ get_gpio_lookup_table(hwmgr->adev);
PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table),
"Could not find GPIO lookup Table in BIOS.", return false);
@@ -613,7 +647,7 @@ int atomctrl_calculate_voltage_evv_on_sclk(
bool debug)
{
ATOM_ASIC_PROFILING_INFO_V3_4 *getASICProfilingInfo;
-
+ struct amdgpu_device *adev = hwmgr->adev;
EFUSE_LINEAR_FUNC_PARAM sRO_fuse;
EFUSE_LINEAR_FUNC_PARAM sCACm_fuse;
EFUSE_LINEAR_FUNC_PARAM sCACb_fuse;
@@ -640,7 +674,7 @@ int atomctrl_calculate_voltage_evv_on_sclk(
int result;
getASICProfilingInfo = (ATOM_ASIC_PROFILING_INFO_V3_4 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
NULL, NULL, NULL);
@@ -706,9 +740,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -727,9 +761,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -747,9 +781,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sInput_FuseValues.ucBitLength = sCACb_fuse.ucEfuseLength;
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -768,9 +802,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -790,9 +824,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -811,9 +845,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sInput_FuseValues.ucBitLength = sKv_b_fuse.ucEfuseLength;
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -842,9 +876,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
sOutput_FuseValues.sEfuse = sInput_FuseValues;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &sOutput_FuseValues);
+ (uint32_t *)&sOutput_FuseValues);
if (result)
return result;
@@ -1053,8 +1087,9 @@ int atomctrl_get_voltage_evv_on_sclk(
uint32_t sclk, uint16_t virtual_voltage_Id,
uint16_t *voltage)
{
- int result;
+ struct amdgpu_device *adev = hwmgr->adev;
GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
+ int result;
get_voltage_info_param_space.ucVoltageType =
voltage_type;
@@ -1065,14 +1100,12 @@ int atomctrl_get_voltage_evv_on_sclk(
get_voltage_info_param_space.ulSCLKFreq =
cpu_to_le32(sclk);
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
- &get_voltage_info_param_space);
-
- if (0 != result)
- return result;
+ (uint32_t *)&get_voltage_info_param_space);
- *voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+ *voltage = result ? 0 :
+ le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
(&get_voltage_info_param_space))->usVoltageLevel);
return result;
@@ -1088,9 +1121,10 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr,
uint16_t virtual_voltage_id,
uint16_t *voltage)
{
+ struct amdgpu_device *adev = hwmgr->adev;
+ GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
int result;
int entry_id;
- GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
/* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */
for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) {
@@ -1111,9 +1145,9 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr,
get_voltage_info_param_space.ulSCLKFreq =
cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk);
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
- &get_voltage_info_param_space);
+ (uint32_t *)&get_voltage_info_param_space);
if (0 != result)
return result;
@@ -1135,7 +1169,7 @@ uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr)
u16 size;
fw_info = (ATOM_COMMON_TABLE_HEADER *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, FirmwareInfo),
&size, &frev, &crev);
@@ -1167,7 +1201,7 @@ static ATOM_ASIC_INTERNAL_SS_INFO *asic_internal_ss_get_ss_table(void *device)
u16 size;
table = (ATOM_ASIC_INTERNAL_SS_INFO *)
- cgs_atom_get_data_table(device,
+ smu_atom_get_data_table(device,
GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info),
&size, &frev, &crev);
@@ -1188,7 +1222,7 @@ static int asic_internal_ss_get_ss_asignment(struct pp_hwmgr *hwmgr,
memset(ssEntry, 0x00, sizeof(pp_atomctrl_internal_ss_info));
- table = asic_internal_ss_get_ss_table(hwmgr->device);
+ table = asic_internal_ss_get_ss_table(hwmgr->adev);
if (NULL == table)
return -1;
@@ -1260,9 +1294,10 @@ int atomctrl_get_engine_clock_spread_spectrum(
ASIC_INTERNAL_ENGINE_SS, engine_clock, ssInfo);
}
-int atomctrl_read_efuse(void *device, uint16_t start_index,
+int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index,
uint16_t end_index, uint32_t mask, uint32_t *efuse)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
READ_EFUSE_VALUE_PARAMETER efuse_param;
@@ -1272,11 +1307,10 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
efuse_param.sEfuse.ucBitLength = (uint8_t)
((end_index - start_index) + 1);
- result = cgs_atom_exec_cmd_table(device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
- &efuse_param);
- if (!result)
- *efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask;
+ (uint32_t *)&efuse_param);
+ *efuse = result ? 0 : le32_to_cpu(efuse_param.ulEfuseValue) & mask;
return result;
}
@@ -1284,6 +1318,7 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
uint8_t level)
{
+ struct amdgpu_device *adev = hwmgr->adev;
DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters;
int result;
@@ -1293,10 +1328,9 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
ADJUST_MC_SETTING_PARAM;
memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level;
- result = cgs_atom_exec_cmd_table
- (hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
- &memory_clock_parameters);
+ (uint32_t *)&memory_clock_parameters);
return result;
}
@@ -1304,7 +1338,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
{
-
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3 get_voltage_info_param_space;
@@ -1313,15 +1347,12 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id);
get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk);
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
- &get_voltage_info_param_space);
+ (uint32_t *)&get_voltage_info_param_space);
- if (0 != result)
- return result;
-
- *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)
- (&get_voltage_info_param_space))->ulVoltageLevel);
+ *voltage = result ? 0 :
+ le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel);
return result;
}
@@ -1334,7 +1365,7 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
u16 size;
ATOM_SMU_INFO_V2_1 *psmu_info =
- (ATOM_SMU_INFO_V2_1 *)cgs_atom_get_data_table(hwmgr->device,
+ (ATOM_SMU_INFO_V2_1 *)smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, SMU_Info),
&size, &frev, &crev);
@@ -1362,7 +1393,7 @@ int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
return -EINVAL;
profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
NULL, NULL, NULL);
if (!profile)
@@ -1402,7 +1433,7 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
uint16_t *load_line)
{
ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
- (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+ (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
const ATOM_VOLTAGE_OBJECT_V3 *voltage_object;
@@ -1421,16 +1452,17 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id)
{
- int result;
+ struct amdgpu_device *adev = hwmgr->adev;
SET_VOLTAGE_PS_ALLOCATION allocation;
SET_VOLTAGE_PARAMETERS_V1_3 *voltage_parameters =
(SET_VOLTAGE_PARAMETERS_V1_3 *)&allocation.sASICSetVoltage;
+ int result;
voltage_parameters->ucVoltageMode = ATOM_GET_LEAKAGE_ID;
- result = cgs_atom_exec_cmd_table(hwmgr->device,
+ result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
GetIndexIntoMasterTable(COMMAND, SetVoltage),
- voltage_parameters);
+ (uint32_t *)voltage_parameters);
*virtual_voltage_id = voltage_parameters->usVoltageLevel;
@@ -1453,7 +1485,7 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr,
ix = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
ix,
NULL, NULL, NULL);
if (!profile)
@@ -1498,3 +1530,33 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr,
return 0;
}
+
+void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc,
+ uint32_t *min_vddc)
+{
+ void *profile;
+
+ profile = smu_atom_get_data_table(hwmgr->adev,
+ GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+ NULL, NULL, NULL);
+
+ if (profile) {
+ switch (hwmgr->chip_id) {
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMaxVddc/4);
+ *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMinVddc/4);
+ return;
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS12:
+ *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMaxVddc/100);
+ *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMinVddc/100);
+ return;
+ default:
+ break;
+ }
+ }
+ *max_vddc = 0;
+ *min_vddc = 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index c44a92064cf1..3ee54f182943 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -146,6 +146,14 @@ struct pp_atomctrl_memory_clock_param {
};
typedef struct pp_atomctrl_memory_clock_param pp_atomctrl_memory_clock_param;
+struct pp_atomctrl_memory_clock_param_ai {
+ uint32_t ulClock;
+ uint32_t ulPostDiv;
+ uint16_t ulMclk_fcw_frac;
+ uint16_t ulMclk_fcw_int;
+};
+typedef struct pp_atomctrl_memory_clock_param_ai pp_atomctrl_memory_clock_param_ai;
+
struct pp_atomctrl_internal_ss_info {
uint32_t speed_spectrum_percentage; /* in 1/100 percentage */
uint32_t speed_spectrum_rate; /* in KHz */
@@ -295,10 +303,12 @@ extern bool atomctrl_is_voltage_controlled_by_gpio_v3(struct pp_hwmgr *hwmgr, ui
extern int atomctrl_get_voltage_table_v3(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint8_t voltage_mode, pp_atomctrl_voltage_table *voltage_table);
extern int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param);
+extern int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr,
+ uint32_t clock_value, pp_atomctrl_memory_clock_param_ai *mpll_param);
extern int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
uint32_t clock_value,
pp_atomctrl_clock_dividers_kong *dividers);
-extern int atomctrl_read_efuse(void *device, uint16_t start_index,
+extern int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index,
uint16_t end_index, uint32_t mask, uint32_t *efuse);
extern int atomctrl_calculate_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage, uint16_t dpm_level, bool debug);
@@ -320,5 +330,8 @@ extern int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr,
uint16_t virtual_voltage_id,
uint16_t efuse_voltage_id);
extern int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id);
+
+extern void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc,
+ uint32_t *min_vddc);
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index ad42caac033e..c97b0e5ba43b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -23,9 +23,9 @@
#include "ppatomfwctrl.h"
#include "atomfirmware.h"
+#include "atom.h"
#include "pp_debug.h"
-
static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4(
const struct atom_voltage_objects_info_v4_1 *voltage_object_info_table,
uint8_t voltage_type, uint8_t voltage_mode)
@@ -38,35 +38,34 @@ static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4(
while (offset < size) {
const union atom_voltage_object_v4 *voltage_object =
- (const union atom_voltage_object_v4 *)(start + offset);
+ (const union atom_voltage_object_v4 *)(start + offset);
- if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type &&
- voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode)
- return voltage_object;
+ if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type &&
+ voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode)
+ return voltage_object;
- offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size);
+ offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size);
- }
+ }
- return NULL;
+ return NULL;
}
static struct atom_voltage_objects_info_v4_1 *pp_atomfwctrl_get_voltage_info_table(
struct pp_hwmgr *hwmgr)
{
- const void *table_address;
- uint16_t idx;
+ const void *table_address;
+ uint16_t idx;
- idx = GetIndexIntoMasterDataTable(voltageobject_info);
- table_address = cgs_atom_get_data_table(hwmgr->device,
- idx, NULL, NULL, NULL);
+ idx = GetIndexIntoMasterDataTable(voltageobject_info);
+ table_address = smu_atom_get_data_table(hwmgr->adev,
+ idx, NULL, NULL, NULL);
- PP_ASSERT_WITH_CODE(
- table_address,
- "Error retrieving BIOS Table Address!",
- return NULL);
+ PP_ASSERT_WITH_CODE(table_address,
+ "Error retrieving BIOS Table Address!",
+ return NULL);
- return (struct atom_voltage_objects_info_v4_1 *)table_address;
+ return (struct atom_voltage_objects_info_v4_1 *)table_address;
}
/**
@@ -167,7 +166,7 @@ static struct atom_gpio_pin_lut_v2_1 *pp_atomfwctrl_get_gpio_lookup_table(
uint16_t idx;
idx = GetIndexIntoMasterDataTable(gpio_pin_lut);
- table_address = cgs_atom_get_data_table(hwmgr->device,
+ table_address = smu_atom_get_data_table(hwmgr->adev,
idx, NULL, NULL, NULL);
PP_ASSERT_WITH_CODE(table_address,
"Error retrieving BIOS Table Address!",
@@ -248,28 +247,30 @@ int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr,
uint32_t clock_type, uint32_t clock_value,
struct pp_atomfwctrl_clock_dividers_soc15 *dividers)
{
+ struct amdgpu_device *adev = hwmgr->adev;
struct compute_gpu_clock_input_parameter_v1_8 pll_parameters;
struct compute_gpu_clock_output_parameter_v1_8 *pll_output;
- int result;
uint32_t idx;
pll_parameters.gpuclock_10khz = (uint32_t)clock_value;
pll_parameters.gpu_clock_type = clock_type;
idx = GetIndexIntoMasterCmdTable(computegpuclockparam);
- result = cgs_atom_exec_cmd_table(hwmgr->device, idx, &pll_parameters);
-
- if (!result) {
- pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *)
- &pll_parameters;
- dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz);
- dividers->ulDid = le32_to_cpu(pll_output->dfs_did);
- dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult);
- dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult);
- dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac);
- dividers->ucPll_ss_enable = pll_output->pll_ss_enable;
- }
- return result;
+
+ if (amdgpu_atom_execute_table(
+ adev->mode_info.atom_context, idx, (uint32_t *)&pll_parameters))
+ return -EINVAL;
+
+ pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *)
+ &pll_parameters;
+ dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz);
+ dividers->ulDid = le32_to_cpu(pll_output->dfs_did);
+ dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult);
+ dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult);
+ dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac);
+ dividers->ucPll_ss_enable = pll_output->pll_ss_enable;
+
+ return 0;
}
int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
@@ -283,7 +284,7 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
idx = GetIndexIntoMasterDataTable(asic_profiling_info);
profile = (struct atom_asic_profiling_info_v4_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
idx, NULL, NULL, NULL);
if (!profile)
@@ -467,7 +468,7 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
idx = GetIndexIntoMasterDataTable(smu_info);
info = (struct atom_smu_info_v3_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
idx, NULL, NULL, NULL);
if (!info) {
@@ -487,8 +488,9 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr,
return 0;
}
-int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency)
+int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency)
{
+ struct amdgpu_device *adev = hwmgr->adev;
struct atom_get_smu_clock_info_parameters_v3_1 parameters;
struct atom_get_smu_clock_info_output_parameters_v3_1 *output;
uint32_t ix;
@@ -497,13 +499,13 @@ int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLK
parameters.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ;
ix = GetIndexIntoMasterCmdTable(getsmuclockinfo);
- if (!cgs_atom_exec_cmd_table(hwmgr->device, ix, &parameters)) {
- output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&parameters;
- *frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000;
- } else {
- pr_info("Error execute_table getsmuclockinfo!");
- return -1;
- }
+
+ if (amdgpu_atom_execute_table(
+ adev->mode_info.atom_context, ix, (uint32_t *)&parameters))
+ return -EINVAL;
+
+ output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&parameters;
+ *frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000;
return 0;
}
@@ -513,11 +515,10 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
{
struct atom_firmware_info_v3_1 *info = NULL;
uint16_t ix;
- uint32_t frequency = 0;
ix = GetIndexIntoMasterDataTable(firmwareinfo);
info = (struct atom_firmware_info_v3_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
ix, NULL, NULL, NULL);
if (!info) {
@@ -536,12 +537,6 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
boot_values->ulSocClk = 0;
boot_values->ulDCEFClk = 0;
- if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency))
- boot_values->ulSocClk = frequency;
-
- if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency))
- boot_values->ulDCEFClk = frequency;
-
return 0;
}
@@ -553,7 +548,7 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
ix = GetIndexIntoMasterDataTable(smc_dpm_info);
info = (struct atom_smc_dpm_info_v4_1 *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
ix, NULL, NULL, NULL);
if (!info) {
pr_info("Error retrieving BIOS Table Address!");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index 8df1e84f27c9..fe10aa4db5e6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -230,6 +230,8 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
struct pp_atomfwctrl_bios_boot_up_values *boot_values);
int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
struct pp_atomfwctrl_smc_dpm_parameters *param);
+int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr,
+ BIOS_CLKID id, uint32_t *frequency);
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
index c9eecce5683f..f0d48b183d22 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
@@ -141,7 +141,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr)
if (!table_address) {
table_address = (ATOM_Tonga_POWERPLAYTABLE *)
- cgs_atom_get_data_table(hwmgr->device,
+ smu_atom_get_data_table(hwmgr->adev,
index, &size, &frev, &crev);
hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/
hwmgr->soft_pp_table_size = size;
@@ -728,6 +728,32 @@ static int get_mm_clock_voltage_table(
return 0;
}
+static int get_gpio_table(struct pp_hwmgr *hwmgr,
+ struct phm_ppt_v1_gpio_table **pp_tonga_gpio_table,
+ const ATOM_Tonga_GPIO_Table *atom_gpio_table)
+{
+ uint32_t table_size;
+ struct phm_ppt_v1_gpio_table *pp_gpio_table;
+ struct phm_ppt_v1_information *pp_table_information =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+ table_size = sizeof(struct phm_ppt_v1_gpio_table);
+ pp_gpio_table = kzalloc(table_size, GFP_KERNEL);
+ if (!pp_gpio_table)
+ return -ENOMEM;
+
+ if (pp_table_information->vdd_dep_on_sclk->count <
+ atom_gpio_table->ucVRHotTriggeredSclkDpmIndex)
+ PP_ASSERT_WITH_CODE(false,
+ "SCLK DPM index for VRHot cannot exceed the total sclk level count!",);
+ else
+ pp_gpio_table->vrhot_triggered_sclk_dpm_index =
+ atom_gpio_table->ucVRHotTriggeredSclkDpmIndex;
+
+ *pp_tonga_gpio_table = pp_gpio_table;
+
+ return 0;
+}
/**
* Private Function used during initialization.
* Initialize clock voltage dependency
@@ -761,11 +787,15 @@ static int init_clock_voltage_dependency(
const PPTable_Generic_SubTable_Header *pcie_table =
(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
le16_to_cpu(powerplay_table->usPCIETableOffset));
+ const ATOM_Tonga_GPIO_Table *gpio_table =
+ (const ATOM_Tonga_GPIO_Table *)(((unsigned long) powerplay_table) +
+ le16_to_cpu(powerplay_table->usGPIOTableOffset));
pp_table_information->vdd_dep_on_sclk = NULL;
pp_table_information->vdd_dep_on_mclk = NULL;
pp_table_information->mm_dep_table = NULL;
pp_table_information->pcie_table = NULL;
+ pp_table_information->gpio_table = NULL;
if (powerplay_table->usMMDependencyTableOffset != 0)
result = get_mm_clock_voltage_table(hwmgr,
@@ -810,6 +840,10 @@ static int init_clock_voltage_dependency(
result = get_valid_clk(hwmgr, &pp_table_information->valid_sclk_values,
pp_table_information->vdd_dep_on_sclk);
+ if (!result && gpio_table)
+ result = get_gpio_table(hwmgr, &pp_table_information->gpio_table,
+ gpio_table);
+
return result;
}
@@ -1116,6 +1150,9 @@ static int pp_tables_v1_0_uninitialize(struct pp_hwmgr *hwmgr)
kfree(pp_table_information->pcie_table);
pp_table_information->pcie_table = NULL;
+ kfree(pp_table_information->gpio_table);
+ pp_table_information->gpio_table = NULL;
+
kfree(hwmgr->pptable);
hwmgr->pptable = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 36ca7c419c90..ce64dfabd34b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -837,7 +837,7 @@ static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table(
hwmgr->soft_pp_table = &soft_dummy_pp_table[0];
hwmgr->soft_pp_table_size = sizeof(soft_dummy_pp_table);
} else {
- table_addr = cgs_atom_get_data_table(hwmgr->device,
+ table_addr = smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, PowerPlayInfo),
&size, &frev, &crev);
hwmgr->soft_pp_table = table_addr;
@@ -1058,7 +1058,7 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr,
return 0;
/* We assume here that fw_info is unchanged if this call fails.*/
- fw_info = cgs_atom_get_data_table(hwmgr->device,
+ fw_info = smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, FirmwareInfo),
&size, &frev, &crev);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 10253b89b3d8..85f84f4d8be5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -34,7 +34,7 @@
#include "rv_ppsmc.h"
#include "smu10_hwmgr.h"
#include "power_state.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#define SMU10_MAX_DEEPSLEEP_DIVIDER_ID 5
#define SMU10_MINIMUM_ENGINE_CLOCK 800 /* 8Mhz, the low boundary of engine clock allowed on this chip */
@@ -42,6 +42,13 @@
#define SMU10_DISPCLK_BYPASS_THRESHOLD 10000 /* 100Mhz */
#define SMC_RAM_END 0x40000
+#define mmPWR_MISC_CNTL_STATUS 0x0183
+#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
+
static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic;
@@ -74,11 +81,15 @@ static int smu10_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
smu10_data->thermal_auto_throttling_treshold = 0;
smu10_data->is_nb_dpm_enabled = 1;
smu10_data->dpm_flags = 1;
- smu10_data->gfx_off_controled_by_driver = false;
smu10_data->need_min_deep_sleep_dcefclk = true;
smu10_data->num_active_display = 0;
smu10_data->deep_sleep_dcefclk = 0;
+ if (hwmgr->feature_mask & PP_GFXOFF_MASK)
+ smu10_data->gfx_off_controled_by_driver = true;
+ else
+ smu10_data->gfx_off_controled_by_driver = false;
+
phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_SclkDeepSleep);
@@ -161,7 +172,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input)
struct PP_Clocks clocks = {0};
struct pp_display_clock_request clock_req;
- clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
+ clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
clock_req.clock_type = amd_pp_dcf_clock;
clock_req.clock_freq_in_khz = clocks.dcefClock * 10;
@@ -206,12 +217,18 @@ static int smu10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input
static int smu10_init_power_gate_state(struct pp_hwmgr *hwmgr)
{
struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+ struct amdgpu_device *adev = hwmgr->adev;
smu10_data->vcn_power_gated = true;
smu10_data->isp_tileA_power_gated = true;
smu10_data->isp_tileB_power_gated = true;
- return 0;
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+ return smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetGfxCGPG,
+ true);
+ else
+ return 0;
}
@@ -237,13 +254,31 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr)
return smu10_reset_cc6_data(hwmgr);
}
+static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
+
+ reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+ if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+ (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+ return true;
+
+ return false;
+}
+
static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
{
struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
- if (smu10_data->gfx_off_controled_by_driver)
+ if (smu10_data->gfx_off_controled_by_driver) {
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
+ /* confirm gfx is back to "on" state */
+ while (!smu10_is_gfx_on(hwmgr))
+ msleep(1);
+ }
+
return 0;
}
@@ -267,6 +302,14 @@ static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
return smu10_enable_gfx_off(hwmgr);
}
+static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
+{
+ if (enable)
+ return smu10_enable_gfx_off(hwmgr);
+ else
+ return smu10_disable_gfx_off(hwmgr);
+}
+
static int smu10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
struct pp_power_state *prequest_ps,
const struct pp_power_state *pcurrent_ps)
@@ -340,7 +383,7 @@ static int smu10_get_clock_voltage_dependency_table(struct pp_hwmgr *hwmgr,
static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr)
{
- int result;
+ uint32_t result;
struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
DpmClocks_t *table = &(smu10_data->clock_table);
@@ -386,11 +429,11 @@ static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr)
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency);
result = smum_get_argument(hwmgr);
- smu10_data->gfx_min_freq_limit = result * 100;
+ smu10_data->gfx_min_freq_limit = result / 10 * 1000;
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency);
result = smum_get_argument(hwmgr);
- smu10_data->gfx_max_freq_limit = result * 100;
+ smu10_data->gfx_max_freq_limit = result / 10 * 1000;
return 0;
}
@@ -436,8 +479,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50;
- hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK;
- hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK;
+ hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100;
+ hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100;
return result;
}
@@ -472,6 +515,8 @@ static int smu10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
enum amd_dpm_forced_level level)
{
+ struct smu10_hwmgr *data = hwmgr->backend;
+
if (hwmgr->smu_version < 0x1E3700) {
pr_info("smu firmware version too old, can not set dpm level\n");
return 0;
@@ -482,7 +527,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinGfxClk,
- SMU10_UMD_PSTATE_PEAK_GFXCLK);
+ data->gfx_max_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinFclkByFreq,
SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -495,7 +540,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxGfxClk,
- SMU10_UMD_PSTATE_PEAK_GFXCLK);
+ data->gfx_max_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxFclkByFreq,
SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -509,10 +554,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinGfxClk,
- SMU10_UMD_PSTATE_MIN_GFXCLK);
+ data->gfx_min_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxGfxClk,
- SMU10_UMD_PSTATE_MIN_GFXCLK);
+ data->gfx_min_freq_limit/100);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
smum_send_msg_to_smc_with_parameter(hwmgr,
@@ -552,10 +597,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
case AMD_DPM_FORCED_LEVEL_AUTO:
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinGfxClk,
- SMU10_UMD_PSTATE_MIN_GFXCLK);
+ data->gfx_min_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinFclkByFreq,
+ hwmgr->display_config->num_display > 3 ?
+ SMU10_UMD_PSTATE_PEAK_FCLK :
SMU10_UMD_PSTATE_MIN_FCLK);
+
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinSocclkByFreq,
SMU10_UMD_PSTATE_MIN_SOCCLK);
@@ -565,7 +613,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxGfxClk,
- SMU10_UMD_PSTATE_PEAK_GFXCLK);
+ data->gfx_max_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxFclkByFreq,
SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -579,10 +627,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
case AMD_DPM_FORCED_LEVEL_LOW:
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinGfxClk,
- SMU10_UMD_PSTATE_MIN_GFXCLK);
+ data->gfx_min_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetSoftMaxGfxClk,
- SMU10_UMD_PSTATE_MIN_GFXCLK);
+ data->gfx_min_freq_limit/100);
smum_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SetHardMinFclkByFreq,
SMU10_UMD_PSTATE_MIN_FCLK);
@@ -699,6 +747,16 @@ static int smu10_set_cpu_power_state(struct pp_hwmgr *hwmgr)
static int smu10_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
bool cc6_disable, bool pstate_disable, bool pstate_switch_disable)
{
+ struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend);
+
+ if (separation_time != data->separation_time ||
+ cc6_disable != data->cc6_disable ||
+ pstate_disable != data->pstate_disable) {
+ data->separation_time = separation_time;
+ data->cc6_disable = cc6_disable;
+ data->pstate_disable = pstate_disable;
+ data->cc6_setting_changed = true;
+ }
return 0;
}
@@ -711,6 +769,51 @@ static int smu10_get_dal_power_level(struct pp_hwmgr *hwmgr,
static int smu10_force_clock_level(struct pp_hwmgr *hwmgr,
enum pp_clock_type type, uint32_t mask)
{
+ struct smu10_hwmgr *data = hwmgr->backend;
+ struct smu10_voltage_dependency_table *mclk_table =
+ data->clock_vol_info.vdd_dep_on_fclk;
+ uint32_t low, high;
+
+ low = mask ? (ffs(mask) - 1) : 0;
+ high = mask ? (fls(mask) - 1) : 0;
+
+ switch (type) {
+ case PP_SCLK:
+ if (low > 2 || high > 2) {
+ pr_info("Currently sclk only support 3 levels on RV\n");
+ return -EINVAL;
+ }
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinGfxClk,
+ low == 2 ? data->gfx_max_freq_limit/100 :
+ low == 1 ? SMU10_UMD_PSTATE_GFXCLK :
+ data->gfx_min_freq_limit/100);
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxGfxClk,
+ high == 0 ? data->gfx_min_freq_limit/100 :
+ high == 1 ? SMU10_UMD_PSTATE_GFXCLK :
+ data->gfx_max_freq_limit/100);
+ break;
+
+ case PP_MCLK:
+ if (low > mclk_table->count - 1 || high > mclk_table->count - 1)
+ return -EINVAL;
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinFclkByFreq,
+ mclk_table->entries[low].clk/100);
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxFclkByFreq,
+ mclk_table->entries[high].clk/100);
+ break;
+
+ case PP_PCIE:
+ default:
+ break;
+ }
return 0;
}
@@ -720,21 +823,30 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend);
struct smu10_voltage_dependency_table *mclk_table =
data->clock_vol_info.vdd_dep_on_fclk;
- int i, now, size = 0;
+ uint32_t i, now, size = 0;
switch (type) {
case PP_SCLK:
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency);
now = smum_get_argument(hwmgr);
+ /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
+ if (now == data->gfx_max_freq_limit/100)
+ i = 2;
+ else if (now == data->gfx_min_freq_limit/100)
+ i = 0;
+ else
+ i = 1;
+
size += sprintf(buf + size, "0: %uMhz %s\n",
- data->gfx_min_freq_limit / 100,
- ((data->gfx_min_freq_limit / 100)
- == now) ? "*" : "");
+ data->gfx_min_freq_limit/100,
+ i == 0 ? "*" : "");
size += sprintf(buf + size, "1: %uMhz %s\n",
- data->gfx_max_freq_limit / 100,
- ((data->gfx_max_freq_limit / 100)
- == now) ? "*" : "");
+ i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK,
+ i == 1 ? "*" : "");
+ size += sprintf(buf + size, "2: %uMhz %s\n",
+ data->gfx_max_freq_limit/100,
+ i == 2 ? "*" : "");
break;
case PP_MCLK:
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency);
@@ -947,9 +1059,8 @@ static int smu10_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simpl
static int smu10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
{
- uint32_t reg_offset = soc15_get_register_offset(THM_HWID, 0,
- mmTHM_TCON_CUR_TMP_BASE_IDX, mmTHM_TCON_CUR_TMP);
- uint32_t reg_value = cgs_read_register(hwmgr->device, reg_offset);
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t reg_value = RREG32_SOC15(THM, 0, mmTHM_TCON_CUR_TMP);
int cur_temp =
(reg_value & THM_TCON_CUR_TMP__CUR_TEMP_MASK) >> THM_TCON_CUR_TMP__CUR_TEMP__SHIFT;
@@ -993,11 +1104,47 @@ static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
return ret;
}
+static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
+ struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges)
+{
+ struct smu10_hwmgr *data = hwmgr->backend;
+ Watermarks_t *table = &(data->water_marks_table);
+ int result = 0;
+
+ smu_set_watermarks_for_clocks_ranges(table,wm_with_clock_ranges);
+ smum_smc_table_manager(hwmgr, (uint8_t *)table, (uint16_t)SMU10_WMTABLE, false);
+ data->water_marks_exist = true;
+ return result;
+}
+
+static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr)
+{
+
+ return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister);
+}
+
static int smu10_set_mmhub_powergating_by_smu(struct pp_hwmgr *hwmgr)
{
return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub);
}
+static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate)
+{
+ if (bgate) {
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_PowerDownVcn, 0);
+ } else {
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_PowerUpVcn, 0);
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+ AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+ }
+}
+
static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
.backend_init = smu10_hwmgr_backend_init,
.backend_fini = smu10_hwmgr_backend_fini,
@@ -1006,7 +1153,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
.force_dpm_level = smu10_dpm_force_dpm_level,
.get_power_state_size = smu10_get_power_state_size,
.powerdown_uvd = NULL,
- .powergate_uvd = NULL,
+ .powergate_uvd = smu10_powergate_vcn,
.powergate_vce = NULL,
.get_mclk = smu10_dpm_get_mclk,
.get_sclk = smu10_dpm_get_sclk,
@@ -1022,6 +1169,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
.get_current_shallow_sleep_clocks = smu10_get_current_shallow_sleep_clocks,
.get_clock_by_type_with_latency = smu10_get_clock_by_type_with_latency,
.get_clock_by_type_with_voltage = smu10_get_clock_by_type_with_voltage,
+ .set_watermarks_for_clocks_ranges = smu10_set_watermarks_for_clocks_ranges,
.get_max_high_clocks = smu10_get_max_high_clocks,
.read_sensor = smu10_read_sensor,
.set_active_display_count = smu10_set_active_display_count,
@@ -1032,6 +1180,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
.power_state_set = smu10_set_power_state_tasks,
.dynamic_state_management_disable = smu10_disable_dpm_tasks,
.set_mmhub_powergating_by_smu = smu10_set_mmhub_powergating_by_smu,
+ .smus_notify_pwe = smu10_smus_notify_pwe,
+ .gfx_off_control = smu10_gfx_off_control,
};
int smu10_init_function_pointers(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
index 175c3a592b6c..1fb296a996f3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
@@ -290,6 +290,7 @@ struct smu10_hwmgr {
bool vcn_dpg_mode;
bool gfx_off_controled_by_driver;
+ bool water_marks_exist;
Watermarks_t water_marks_table;
struct smu10_clock_voltage_information clock_vol_info;
DpmClocks_t clock_table;
@@ -310,11 +311,9 @@ int smu10_init_function_pointers(struct pp_hwmgr *hwmgr);
#define SMU10_UMD_PSTATE_FCLK 933
#define SMU10_UMD_PSTATE_VCE 0x03C00320
-#define SMU10_UMD_PSTATE_PEAK_GFXCLK 1100
#define SMU10_UMD_PSTATE_PEAK_SOCCLK 757
#define SMU10_UMD_PSTATE_PEAK_FCLK 1200
-#define SMU10_UMD_PSTATE_MIN_GFXCLK 200
#define SMU10_UMD_PSTATE_MIN_FCLK 400
#define SMU10_UMD_PSTATE_MIN_SOCCLK 200
#define SMU10_UMD_PSTATE_MIN_VCE 0x0190012C
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index f4cbaee4e2ca..6d72a5600917 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -147,20 +147,20 @@ void smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
data->uvd_power_gated = bgate;
if (bgate) {
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_PG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_CG_STATE_GATE);
smu7_update_uvd_dpm(hwmgr, true);
smu7_powerdown_uvd(hwmgr);
} else {
smu7_powerup_uvd(hwmgr);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_CG_STATE_UNGATE);
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_PG_STATE_UNGATE);
smu7_update_uvd_dpm(hwmgr, false);
@@ -175,20 +175,20 @@ void smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
data->vce_power_gated = bgate;
if (bgate) {
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_GATE);
smu7_update_vce_dpm(hwmgr, true);
smu7_powerdown_vce(hwmgr);
} else {
smu7_powerup_vce(hwmgr);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
smu7_update_vce_dpm(hwmgr, false);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 18b5b2ff47fe..45e9b8cb169d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -61,10 +61,6 @@
#define SMC_CG_IND_START 0xc0030000
#define SMC_CG_IND_END 0xc0040000
-#define VOLTAGE_SCALE 4
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
-
#define MEM_FREQ_LOW_LATENCY 25000
#define MEM_FREQ_HIGH_LATENCY 80000
@@ -88,6 +84,14 @@ static const struct profile_mode_setting smu7_profiling[6] =
{0, 0, 0, 0, 0, 0, 0, 0},
};
+#define PPSMC_MSG_SetVBITimeout_VEGAM ((uint16_t) 0x310)
+
+#define ixPWR_SVI2_PLANE1_LOAD 0xC0200280
+#define PWR_SVI2_PLANE1_LOAD__PSI1_MASK 0x00000020L
+#define PWR_SVI2_PLANE1_LOAD__PSI0_EN_MASK 0x00000040L
+#define PWR_SVI2_PLANE1_LOAD__PSI1__SHIFT 0x00000005
+#define PWR_SVI2_PLANE1_LOAD__PSI0_EN__SHIFT 0x00000006
+
/** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */
enum DPM_EVENT_SRC {
DPM_EVENT_SRC_ANALOG = 0,
@@ -169,6 +173,13 @@ static int smu7_get_current_pcie_lane_number(struct pp_hwmgr *hwmgr)
*/
static int smu7_enable_smc_voltage_controller(struct pp_hwmgr *hwmgr)
{
+ if (hwmgr->chip_id == CHIP_VEGAM) {
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device,
+ CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI1, 0);
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device,
+ CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI0_EN, 0);
+ }
+
if (hwmgr->feature_mask & PP_SMC_VOLTAGE_CONTROL_MASK)
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable);
@@ -798,32 +809,6 @@ static int smu7_setup_dpm_tables_v1(struct pp_hwmgr *hwmgr)
return 0;
}
-static int smu7_get_voltage_dependency_table(
- const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
-{
- uint8_t i = 0;
- PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count),
- "Voltage Lookup Table empty",
- return -EINVAL);
-
- dep_table->count = allowed_dep_table->count;
- for (i=0; i<dep_table->count; i++) {
- dep_table->entries[i].clk = allowed_dep_table->entries[i].clk;
- dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd;
- dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset;
- dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc;
- dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx;
- dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci;
- dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd;
- dep_table->entries[i].phases = allowed_dep_table->entries[i].phases;
- dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable;
- dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset;
- }
-
- return 0;
-}
-
static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -851,7 +836,7 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
entries[i].vddc = dep_sclk_table->entries[i].vddc;
}
- smu7_get_voltage_dependency_table(dep_sclk_table,
+ smu_get_voltage_dependency_table_ppt_v1(dep_sclk_table,
(struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk));
odn_table->odn_memory_clock_dpm_levels.num_of_pl =
@@ -863,12 +848,40 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
entries[i].vddc = dep_mclk_table->entries[i].vddc;
}
- smu7_get_voltage_dependency_table(dep_mclk_table,
+ smu_get_voltage_dependency_table_ppt_v1(dep_mclk_table,
(struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk));
return 0;
}
+static void smu7_setup_voltage_range_from_vbios(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ uint32_t min_vddc = 0;
+ uint32_t max_vddc = 0;
+
+ if (!table_info)
+ return;
+
+ dep_sclk_table = table_info->vdd_dep_on_sclk;
+
+ atomctrl_get_voltage_range(hwmgr, &max_vddc, &min_vddc);
+
+ if (min_vddc == 0 || min_vddc > 2000
+ || min_vddc > dep_sclk_table->entries[0].vddc)
+ min_vddc = dep_sclk_table->entries[0].vddc;
+
+ if (max_vddc == 0 || max_vddc > 2000
+ || max_vddc < dep_sclk_table->entries[dep_sclk_table->count-1].vddc)
+ max_vddc = dep_sclk_table->entries[dep_sclk_table->count-1].vddc;
+
+ data->odn_dpm_table.min_vddc = min_vddc;
+ data->odn_dpm_table.max_vddc = max_vddc;
+}
+
static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -887,8 +900,10 @@ static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
sizeof(struct smu7_dpm_table));
/* initialize ODN table */
- if (hwmgr->od_enabled)
+ if (hwmgr->od_enabled) {
+ smu7_setup_voltage_range_from_vbios(hwmgr);
smu7_odn_initial_default_setting(hwmgr);
+ }
return 0;
}
@@ -966,6 +981,22 @@ static int smu7_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
return 0;
}
+static int smu7_disable_sclk_vce_handshake(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t soft_register_value = 0;
+ uint32_t handshake_disables_offset = data->soft_regs_start
+ + smum_get_offsetof(hwmgr,
+ SMU_SoftRegisters, HandshakeDisables);
+
+ soft_register_value = cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, handshake_disables_offset);
+ soft_register_value |= SMU7_VCE_SCLK_HANDSHAKE_DISABLE;
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+ handshake_disables_offset, soft_register_value);
+ return 0;
+}
+
static int smu7_disable_handshake_uvd(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -988,23 +1019,29 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
/* enable SCLK dpm */
- if (!data->sclk_dpm_key_disabled)
+ if (!data->sclk_dpm_key_disabled) {
+ if (hwmgr->chip_id == CHIP_VEGAM)
+ smu7_disable_sclk_vce_handshake(hwmgr);
+
PP_ASSERT_WITH_CODE(
(0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable)),
"Failed to enable SCLK DPM during DPM Start Function!",
return -EINVAL);
+ }
/* enable MCLK dpm */
if (0 == data->mclk_dpm_key_disabled) {
if (!(hwmgr->feature_mask & PP_UVD_HANDSHAKE_MASK))
smu7_disable_handshake_uvd(hwmgr);
+
PP_ASSERT_WITH_CODE(
(0 == smum_send_msg_to_smc(hwmgr,
PPSMC_MSG_MCLKDPM_Enable)),
"Failed to enable MCLK DPM during DPM Start Function!",
return -EINVAL);
- PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
+ if (hwmgr->chip_family != CHIP_VEGAM)
+ PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
if (hwmgr->chip_family == AMDGPU_FAMILY_CI) {
@@ -1020,8 +1057,13 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x5);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x100005);
udelay(10);
- cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005);
- cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005);
+ if (hwmgr->chip_id == CHIP_VEGAM) {
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400009);
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400009);
+ } else {
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005);
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005);
+ }
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x500005);
}
}
@@ -1230,7 +1272,7 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
tmp_result = smu7_construct_voltage_tables(hwmgr);
PP_ASSERT_WITH_CODE((0 == tmp_result),
- "Failed to contruct voltage tables!",
+ "Failed to construct voltage tables!",
result = tmp_result);
}
smum_initialize_mc_reg_table(hwmgr);
@@ -1262,10 +1304,12 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
PP_ASSERT_WITH_CODE((0 == tmp_result),
"Failed to process firmware header!", result = tmp_result);
- tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr);
- PP_ASSERT_WITH_CODE((0 == tmp_result),
- "Failed to initialize switch from ArbF0 to F1!",
- result = tmp_result);
+ if (hwmgr->chip_id != CHIP_VEGAM) {
+ tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr);
+ PP_ASSERT_WITH_CODE((0 == tmp_result),
+ "Failed to initialize switch from ArbF0 to F1!",
+ result = tmp_result);
+ }
result = smu7_setup_default_dpm_tables(hwmgr);
PP_ASSERT_WITH_CODE(0 == result,
@@ -2755,6 +2799,9 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
case CHIP_POLARIS12:
switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
break;
+ case CHIP_VEGAM:
+ switch_limit_us = 30;
+ break;
default:
switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
break;
@@ -2778,8 +2825,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
struct PP_Clocks minimum_clocks = {0};
bool disable_mclk_switching;
bool disable_mclk_switching_for_frame_lock;
- struct cgs_display_info info = {0};
- struct cgs_mode_info mode_info = {0};
const struct phm_clock_and_voltage_limits *max_limits;
uint32_t i;
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -2788,7 +2833,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
int32_t count;
int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
- info.mode_info = &mode_info;
data->battery_state = (PP_StateUILabel_Battery ==
request_ps->classification.ui_label);
@@ -2810,10 +2854,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
}
}
- cgs_get_active_displays_info(hwmgr->device, &info);
-
- minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
- minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+ minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock;
+ minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_StablePState)) {
@@ -2844,12 +2886,12 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
- if (info.display_count == 0)
+ if (hwmgr->display_config->num_display == 0)
disable_mclk_switching = false;
else
- disable_mclk_switching = ((1 < info.display_count) ||
+ disable_mclk_switching = ((1 < hwmgr->display_config->num_display) ||
disable_mclk_switching_for_frame_lock ||
- smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us));
+ smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time));
sclk = smu7_ps->performance_levels[0].engine_clock;
mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -2958,8 +3000,7 @@ static int smu7_dpm_patch_boot_state(struct pp_hwmgr *hwmgr,
/* First retrieve the Boot clocks and VDDC from the firmware info table.
* We assume here that fw_info is unchanged if this call fails.
*/
- fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)cgs_atom_get_data_table(
- hwmgr->device, index,
+ fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)smu_atom_get_data_table(hwmgr->adev, index,
&size, &frev, &crev);
if (!fw_info)
/* During a test, there is no firmware info table. */
@@ -3367,34 +3408,35 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr,
return 0;
}
-static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr,
- struct pp_gpu_power *query)
+static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
{
- PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
- PPSMC_MSG_PmStatusLogStart),
- "Failed to start pm status log!",
- return -1);
+ int i;
+ u32 tmp = 0;
+
+ if (!query)
+ return -EINVAL;
- /* Sampling period from 50ms to 4sec */
- msleep_interruptible(200);
+ smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
+ tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+ *query = tmp;
- PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
- PPSMC_MSG_PmStatusLogSample),
- "Failed to sample pm status log!",
- return -1);
+ if (tmp != 0)
+ return 0;
- query->vddc_power = cgs_read_ind_register(hwmgr->device,
- CGS_IND_REG__SMC,
- ixSMU_PM_STATUS_40);
- query->vddci_power = cgs_read_ind_register(hwmgr->device,
- CGS_IND_REG__SMC,
- ixSMU_PM_STATUS_49);
- query->max_gpu_power = cgs_read_ind_register(hwmgr->device,
- CGS_IND_REG__SMC,
- ixSMU_PM_STATUS_94);
- query->average_gpu_power = cgs_read_ind_register(hwmgr->device,
- CGS_IND_REG__SMC,
- ixSMU_PM_STATUS_95);
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+ ixSMU_PM_STATUS_94, 0);
+
+ for (i = 0; i < 10; i++) {
+ mdelay(1);
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample);
+ tmp = cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC,
+ ixSMU_PM_STATUS_94);
+ if (tmp != 0)
+ break;
+ }
+ *query = tmp;
return 0;
}
@@ -3447,10 +3489,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
return 0;
case AMDGPU_PP_SENSOR_GPU_POWER:
- if (*size < sizeof(struct pp_gpu_power))
- return -EINVAL;
- *size = sizeof(struct pp_gpu_power);
- return smu7_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
+ return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
case AMDGPU_PP_SENSOR_VDDGFX:
if ((data->vr_config & 0xff) == 0x2)
val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device,
@@ -3481,7 +3520,6 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
[smu7_ps->performance_level_count - 1].memory_clock;
struct PP_Clocks min_clocks = {0};
uint32_t i;
- struct cgs_display_info info = {0};
for (i = 0; i < sclk_table->count; i++) {
if (sclk == sclk_table->dpm_levels[i].value)
@@ -3508,9 +3546,8 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
if (i >= mclk_table->count)
data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
- cgs_get_active_displays_info(hwmgr->device, &info);
- if (data->display_timing.num_existing_displays != info.display_count)
+ if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
return 0;
@@ -3813,9 +3850,14 @@ static int smu7_notify_smc_display(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
- if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK)
- smum_send_msg_to_smc_with_parameter(hwmgr,
- (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+ if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) {
+ if (hwmgr->chip_id == CHIP_VEGAM)
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ (PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2);
+ else
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+ }
return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ? 0 : -EINVAL;
}
@@ -3909,15 +3951,8 @@ smu7_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
static int
smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr)
{
- uint32_t num_active_displays = 0;
- struct cgs_display_info info = {0};
-
- info.mode_info = NULL;
- cgs_get_active_displays_info(hwmgr->device, &info);
-
- num_active_displays = info.display_count;
-
- if (num_active_displays > 1 && hwmgr->display_config.multi_monitor_in_sync != true)
+ if (hwmgr->display_config->num_display > 1 &&
+ !hwmgr->display_config->multi_monitor_in_sync)
smu7_notify_smc_display_change(hwmgr, false);
return 0;
@@ -3932,33 +3967,24 @@ smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr)
static int smu7_program_display_gap(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
- uint32_t num_active_displays = 0;
uint32_t display_gap = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL);
uint32_t display_gap2;
uint32_t pre_vbi_time_in_us;
uint32_t frame_time_in_us;
- uint32_t ref_clock;
- uint32_t refresh_rate = 0;
- struct cgs_display_info info = {0};
- struct cgs_mode_info mode_info = {0};
+ uint32_t ref_clock, refresh_rate;
- info.mode_info = &mode_info;
- cgs_get_active_displays_info(hwmgr->device, &info);
- num_active_displays = info.display_count;
-
- display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (num_active_displays > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE);
+ display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (hwmgr->display_config->num_display > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL, display_gap);
ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
-
- refresh_rate = mode_info.refresh_rate;
+ refresh_rate = hwmgr->display_config->vrefresh;
if (0 == refresh_rate)
refresh_rate = 60;
frame_time_in_us = 1000000 / refresh_rate;
- pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+ pre_vbi_time_in_us = frame_time_in_us - 200 - hwmgr->display_config->min_vblank_time;
data->frame_time_x2 = frame_time_in_us * 2 / 100;
@@ -4038,17 +4064,14 @@ smu7_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
bool is_update_required = false;
- struct cgs_display_info info = {0, 0, NULL};
-
- cgs_get_active_displays_info(hwmgr->device, &info);
- if (data->display_timing.num_existing_displays != info.display_count)
+ if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
is_update_required = true;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) {
- if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr &&
+ if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr &&
(data->display_timing.min_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK ||
- hwmgr->display_config.min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK))
+ hwmgr->display_config->min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK))
is_update_required = true;
}
return is_update_required;
@@ -4103,7 +4126,7 @@ static int smu7_check_states_equal(struct pp_hwmgr *hwmgr,
return 0;
}
-static int smu7_upload_mc_firmware(struct pp_hwmgr *hwmgr)
+static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -4182,13 +4205,9 @@ static int smu7_read_clock_registers(struct pp_hwmgr *hwmgr)
static int smu7_get_memory_type(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
- uint32_t temp;
-
- temp = cgs_read_register(hwmgr->device, mmMC_SEQ_MISC0);
+ struct amdgpu_device *adev = hwmgr->adev;
- data->is_memory_gddr5 = (MC_SEQ_MISC0_GDDR5_VALUE ==
- ((temp & MC_SEQ_MISC0_GDDR5_MASK) >>
- MC_SEQ_MISC0_GDDR5_SHIFT));
+ data->is_memory_gddr5 = (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5);
return 0;
}
@@ -4236,7 +4255,7 @@ static int smu7_setup_asic_task(struct pp_hwmgr *hwmgr)
{
int tmp_result, result = 0;
- smu7_upload_mc_firmware(hwmgr);
+ smu7_check_mc_firmware(hwmgr);
tmp_result = smu7_read_clock_registers(hwmgr);
PP_ASSERT_WITH_CODE((0 == tmp_result),
@@ -4371,22 +4390,36 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
break;
case OD_SCLK:
if (hwmgr->od_enabled) {
- size = sprintf(buf, "%s: \n", "OD_SCLK");
+ size = sprintf(buf, "%s:\n", "OD_SCLK");
for (i = 0; i < odn_sclk_table->num_of_pl; i++)
- size += sprintf(buf + size, "%d: %10uMhz %10u mV\n",
- i, odn_sclk_table->entries[i].clock / 100,
+ size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+ i, odn_sclk_table->entries[i].clock/100,
odn_sclk_table->entries[i].vddc);
}
break;
case OD_MCLK:
if (hwmgr->od_enabled) {
- size = sprintf(buf, "%s: \n", "OD_MCLK");
+ size = sprintf(buf, "%s:\n", "OD_MCLK");
for (i = 0; i < odn_mclk_table->num_of_pl; i++)
- size += sprintf(buf + size, "%d: %10uMhz %10u mV\n",
- i, odn_mclk_table->entries[i].clock / 100,
+ size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+ i, odn_mclk_table->entries[i].clock/100,
odn_mclk_table->entries[i].vddc);
}
break;
+ case OD_RANGE:
+ if (hwmgr->od_enabled) {
+ size = sprintf(buf, "%s:\n", "OD_RANGE");
+ size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+ size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+ size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+ data->odn_dpm_table.min_vddc,
+ data->odn_dpm_table.max_vddc);
+ }
+ break;
default:
break;
}
@@ -4670,36 +4703,27 @@ static bool smu7_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
- struct phm_ppt_v1_information *table_info =
- (struct phm_ppt_v1_information *)(hwmgr->pptable);
- uint32_t min_vddc;
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
-
- if (table_info == NULL)
- return false;
-
- dep_sclk_table = table_info->vdd_dep_on_sclk;
- min_vddc = dep_sclk_table->entries[0].vddc;
-
- if (voltage < min_vddc || voltage > 2000) {
- pr_info("OD voltage is out of range [%d - 2000] mV\n", min_vddc);
+ if (voltage < data->odn_dpm_table.min_vddc || voltage > data->odn_dpm_table.max_vddc) {
+ pr_info("OD voltage is out of range [%d - %d] mV\n",
+ data->odn_dpm_table.min_vddc,
+ data->odn_dpm_table.max_vddc);
return false;
}
if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
- if (data->vbios_boot_state.sclk_bootup_value > clk ||
+ if (data->golden_dpm_table.sclk_table.dpm_levels[0].value > clk ||
hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) {
pr_info("OD engine clock is out of range [%d - %d] MHz\n",
- data->vbios_boot_state.sclk_bootup_value,
- hwmgr->platform_descriptor.overdriveLimit.engineClock / 100);
+ data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
return false;
}
} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
- if (data->vbios_boot_state.mclk_bootup_value > clk ||
+ if (data->golden_dpm_table.mclk_table.dpm_levels[0].value > clk ||
hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) {
pr_info("OD memory clock is out of range [%d - %d] MHz\n",
- data->vbios_boot_state.mclk_bootup_value/100,
- hwmgr->platform_descriptor.overdriveLimit.memoryClock / 100);
+ data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
return false;
}
} else {
@@ -4748,10 +4772,6 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
return;
}
}
- if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
- data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
- data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
- }
dep_table = table_info->vdd_dep_on_sclk;
odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk);
@@ -4761,9 +4781,9 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
return;
}
}
- if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+ if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
- data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+ data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
}
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
index b8d0bb378595..c91e75db6a8e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
@@ -184,6 +184,8 @@ struct smu7_odn_dpm_table {
struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_sclk;
struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_mclk;
uint32_t odn_mclk_min_limit;
+ uint32_t min_vddc;
+ uint32_t max_vddc;
};
struct profile_mode_setting {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index d9e92e306535..c952845833d7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -623,6 +623,190 @@ static const struct gpu_pt_config_reg DIDTConfig_Polaris11_Kicker[] =
{ 0xFFFFFFFF } /* End of list */
};
+static const struct gpu_pt_config_reg GCCACConfig_VegaM[] =
+{
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+// Offset Mask Shift Value Type
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ // DIDT_SQ
+ //
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00060013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00860013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01060013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01860013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02060013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02860013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03060013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03860013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x04060013, GPU_CONFIGREG_GC_CAC_IND },
+
+ // DIDT_TD
+ //
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x000E0013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x008E0013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x010E0013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x018E0013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x020E0013, GPU_CONFIGREG_GC_CAC_IND },
+
+ // DIDT_TCP
+ //
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00100013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00900013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01100013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01900013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02100013, GPU_CONFIGREG_GC_CAC_IND },
+ { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02900013, GPU_CONFIGREG_GC_CAC_IND },
+
+ { 0xFFFFFFFF } // End of list
+};
+
+static const struct gpu_pt_config_reg DIDTConfig_VegaM[] =
+{
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+// Offset Mask Shift Value Type
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ // DIDT_SQ
+ //
+ { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT, 0x0073, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT, 0x00ab, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT, 0x0067, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT, 0x0027, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT, 0x00aa, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MIN_POWER_MASK, DIDT_SQ_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MAX_POWER_MASK, DIDT_SQ_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__UNUSED_0_MASK, DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK, DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_0_MASK, DIDT_SQ_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_1_MASK, DIDT_SQ_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_2_MASK, DIDT_SQ_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__UNUSED_0_MASK, DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3153, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK, DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK, DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__PHASE_OFFSET_MASK, DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__UNUSED_0_MASK, DIDT_SQ_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ // DIDT_TD
+ //
+ { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT0_MASK, DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT, 0x000a, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT1_MASK, DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT2_MASK, DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT, 0x0017, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT3_MASK, DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT, 0x002f, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT4_MASK, DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT5_MASK, DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT, 0x005d, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT6_MASK, DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT7_MASK, DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MIN_POWER_MASK, DIDT_TD_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MAX_POWER_MASK, DIDT_TD_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__UNUSED_0_MASK, DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3fff, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_0_MASK, DIDT_TD_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x000f, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_1_MASK, DIDT_TD_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_2_MASK, DIDT_TD_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__UNUSED_0_MASK, DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__UNUSED_0_MASK, DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__USE_REF_CLOCK_MASK, DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__PHASE_OFFSET_MASK, DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__UNUSED_0_MASK, DIDT_TD_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ // DIDT_TCP
+ //
+ { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT, 0x0004, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT, 0x0037, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT, 0x0054, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MIN_POWER_MASK, DIDT_TCP_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MAX_POWER_MASK, DIDT_TCP_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__UNUSED_0_MASK, DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_0_MASK, DIDT_TCP_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x0032, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_1_MASK, DIDT_TCP_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_2_MASK, DIDT_TCP_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT,0x01aa, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__UNUSED_0_MASK, DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK, DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK, DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__PHASE_OFFSET_MASK, DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND },
+ { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__UNUSED_0_MASK, DIDT_TCP_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND },
+
+ { 0xFFFFFFFF } // End of list
+};
static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
{
uint32_t en = enable ? 1 : 0;
@@ -740,8 +924,8 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr)
PP_CAP(PHM_PlatformCaps_TDRamping) ||
PP_CAP(PHM_PlatformCaps_TCPRamping)) {
- cgs_enter_safe_mode(hwmgr->device, true);
- cgs_lock_grbm_idx(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
+ mutex_lock(&adev->grbm_idx_mutex);
value = 0;
value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX);
for (count = 0; count < num_se; count++) {
@@ -752,67 +936,80 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr)
if (hwmgr->chip_id == CHIP_POLARIS10) {
result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris10);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
} else if (hwmgr->chip_id == CHIP_POLARIS11) {
result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
if (hwmgr->is_kicker)
result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11_Kicker);
else
result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
} else if (hwmgr->chip_id == CHIP_POLARIS12) {
result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris12);
- PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
+ } else if (hwmgr->chip_id == CHIP_VEGAM) {
+ result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_VegaM);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
+ result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_VegaM);
+ PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
}
}
cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2);
result = smu7_enable_didt(hwmgr, true);
- PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", return result);
+ PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", goto error);
if (hwmgr->chip_id == CHIP_POLARIS11) {
result = smum_send_msg_to_smc(hwmgr,
(uint16_t)(PPSMC_MSG_EnableDpmDidt));
PP_ASSERT_WITH_CODE((0 == result),
- "Failed to enable DPM DIDT.", return result);
+ "Failed to enable DPM DIDT.", goto error);
}
- cgs_lock_grbm_idx(hwmgr->device, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
}
return 0;
+error:
+ mutex_unlock(&adev->grbm_idx_mutex);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+ return result;
}
int smu7_disable_didt_config(struct pp_hwmgr *hwmgr)
{
int result;
+ struct amdgpu_device *adev = hwmgr->adev;
if (PP_CAP(PHM_PlatformCaps_SQRamping) ||
PP_CAP(PHM_PlatformCaps_DBRamping) ||
PP_CAP(PHM_PlatformCaps_TDRamping) ||
PP_CAP(PHM_PlatformCaps_TCPRamping)) {
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
result = smu7_enable_didt(hwmgr, false);
PP_ASSERT_WITH_CODE((result == 0),
"Post DIDT enable clock gating failed.",
- return result);
+ goto error);
if (hwmgr->chip_id == CHIP_POLARIS11) {
result = smum_send_msg_to_smc(hwmgr,
(uint16_t)(PPSMC_MSG_DisableDpmDidt));
PP_ASSERT_WITH_CODE((0 == result),
- "Failed to disable DPM DIDT.", return result);
+ "Failed to disable DPM DIDT.", goto error);
}
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
}
return 0;
+error:
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+ return result;
}
int smu7_enable_smc_cac(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
index 7b26607c646a..50690c72b2ea 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
@@ -314,8 +314,7 @@ static int smu8_get_system_info_data(struct pp_hwmgr *hwmgr)
uint8_t frev, crev;
uint16_t size;
- info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *) cgs_atom_get_data_table(
- hwmgr->device,
+ info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *)smu_atom_get_data_table(hwmgr->adev,
GetIndexIntoMasterTable(DATA, IntegratedSystemInfo),
&size, &frev, &crev);
@@ -694,7 +693,7 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr)
else
data->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk;
- clock = hwmgr->display_config.min_core_set_clock;
+ clock = hwmgr->display_config->min_core_set_clock;
if (clock == 0)
pr_debug("min_core_set_clock not set\n");
@@ -749,7 +748,7 @@ static int smu8_set_deep_sleep_sclk_threshold(struct pp_hwmgr *hwmgr)
{
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_SclkDeepSleep)) {
- uint32_t clks = hwmgr->display_config.min_core_set_clock_in_sr;
+ uint32_t clks = hwmgr->display_config->min_core_set_clock_in_sr;
if (clks == 0)
clks = SMU8_MIN_DEEP_SLEEP_SCLK;
@@ -1041,25 +1040,21 @@ static int smu8_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
struct smu8_hwmgr *data = hwmgr->backend;
struct PP_Clocks clocks = {0, 0, 0, 0};
bool force_high;
- uint32_t num_of_active_displays = 0;
- struct cgs_display_info info = {0};
smu8_ps->need_dfs_bypass = true;
data->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
- clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
- hwmgr->display_config.min_mem_set_clock :
+ clocks.memoryClock = hwmgr->display_config->min_mem_set_clock != 0 ?
+ hwmgr->display_config->min_mem_set_clock :
data->sys_info.nbp_memory_clock[1];
- cgs_get_active_displays_info(hwmgr->device, &info);
- num_of_active_displays = info.display_count;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
force_high = (clocks.memoryClock > data->sys_info.nbp_memory_clock[SMU8_NUM_NBPMEMORYCLOCK - 1])
- || (num_of_active_displays >= 3);
+ || (hwmgr->display_config->num_display >= 3);
smu8_ps->action = smu8_current_ps->action;
@@ -1897,20 +1892,20 @@ static void smu8_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
data->uvd_power_gated = bgate;
if (bgate) {
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_PG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_CG_STATE_GATE);
smu8_dpm_update_uvd_dpm(hwmgr, true);
smu8_dpm_powerdown_uvd(hwmgr);
} else {
smu8_dpm_powerup_uvd(hwmgr);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_CG_STATE_UNGATE);
- cgs_set_powergating_state(hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_UVD,
AMD_PG_STATE_UNGATE);
smu8_dpm_update_uvd_dpm(hwmgr, false);
@@ -1923,12 +1918,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
struct smu8_hwmgr *data = hwmgr->backend;
if (bgate) {
- cgs_set_powergating_state(
- hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_GATE);
- cgs_set_clockgating_state(
- hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_GATE);
smu8_enable_disable_vce_dpm(hwmgr, false);
@@ -1937,12 +1930,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
} else {
smu8_dpm_powerup_vce(hwmgr);
data->vce_power_gated = false;
- cgs_set_clockgating_state(
- hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_CG_STATE_UNGATE);
- cgs_set_powergating_state(
- hwmgr->device,
+ amdgpu_device_ip_set_powergating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_VCE,
AMD_PG_STATE_UNGATE);
smu8_dpm_update_vce_dpm(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 598122854ab5..93a3d022ba47 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -24,6 +24,7 @@
#include "pp_debug.h"
#include "ppatomctrl.h"
#include "ppsmc.h"
+#include "atom.h"
uint8_t convert_to_vid(uint16_t vddc)
{
@@ -608,3 +609,100 @@ int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr)
return 0;
}
+
+void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
+ uint8_t *frev, uint8_t *crev)
+{
+ struct amdgpu_device *adev = dev;
+ uint16_t data_start;
+
+ if (amdgpu_atom_parse_data_header(
+ adev->mode_info.atom_context, table, size,
+ frev, crev, &data_start))
+ return (uint8_t *)adev->mode_info.atom_context->bios +
+ data_start;
+
+ return NULL;
+}
+
+int smu_get_voltage_dependency_table_ppt_v1(
+ const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
+{
+ uint8_t i = 0;
+ PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count),
+ "Voltage Lookup Table empty",
+ return -EINVAL);
+
+ dep_table->count = allowed_dep_table->count;
+ for (i=0; i<dep_table->count; i++) {
+ dep_table->entries[i].clk = allowed_dep_table->entries[i].clk;
+ dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd;
+ dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset;
+ dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc;
+ dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx;
+ dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci;
+ dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd;
+ dep_table->entries[i].phases = allowed_dep_table->entries[i].phases;
+ dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable;
+ dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset;
+ }
+
+ return 0;
+}
+
+int smu_set_watermarks_for_clocks_ranges(void *wt_table,
+ struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges)
+{
+ uint32_t i;
+ struct watermarks *table = wt_table;
+
+ if (!table || !wm_with_clock_ranges)
+ return -EINVAL;
+
+ if (wm_with_clock_ranges->num_wm_sets_dmif > 4 || wm_with_clock_ranges->num_wm_sets_mcif > 4)
+ return -EINVAL;
+
+ for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) {
+ table->WatermarkRow[1][i].MinClock =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) /
+ 100);
+ table->WatermarkRow[1][i].MaxClock =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) /
+ 100);
+ table->WatermarkRow[1][i].MinUclk =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) /
+ 100);
+ table->WatermarkRow[1][i].MaxUclk =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) /
+ 100);
+ table->WatermarkRow[1][i].WmSetting = (uint8_t)
+ wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id;
+ }
+
+ for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) {
+ table->WatermarkRow[0][i].MinClock =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) /
+ 100);
+ table->WatermarkRow[0][i].MaxClock =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) /
+ 100);
+ table->WatermarkRow[0][i].MinUclk =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) /
+ 100);
+ table->WatermarkRow[0][i].MaxUclk =
+ cpu_to_le16((uint16_t)
+ (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) /
+ 100);
+ table->WatermarkRow[0][i].WmSetting = (uint8_t)
+ wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id;
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
index d37d16e4b613..916cc01e7652 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
@@ -26,10 +26,27 @@
struct pp_atomctrl_voltage_table;
struct pp_hwmgr;
struct phm_ppt_v1_voltage_lookup_table;
+struct Watermarks_t;
+struct pp_wm_sets_with_clock_ranges_soc15;
uint8_t convert_to_vid(uint16_t vddc);
uint16_t convert_to_vddc(uint8_t vid);
+struct watermark_row_generic_t {
+ uint16_t MinClock;
+ uint16_t MaxClock;
+ uint16_t MinUclk;
+ uint16_t MaxUclk;
+
+ uint8_t WmSetting;
+ uint8_t Padding[3];
+};
+
+struct watermarks {
+ struct watermark_row_generic_t WatermarkRow[2][4];
+ uint32_t padding[7];
+};
+
extern int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr,
uint32_t index,
uint32_t value, uint32_t mask);
@@ -82,6 +99,16 @@ int phm_irq_process(struct amdgpu_device *adev,
int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr);
+void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
+ uint8_t *frev, uint8_t *crev);
+
+int smu_get_voltage_dependency_table_ppt_v1(
+ const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table);
+
+int smu_set_watermarks_for_clocks_ranges(void *wt_table,
+ struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges);
+
#define PHM_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define PHM_FIELD_MASK(reg, field) reg##__##field##_MASK
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 7cbb56ba6fab..d156b7bb92ae 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -36,7 +36,7 @@
#include "smu9.h"
#include "smu9_driver_if.h"
#include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "pppcielanes.h"
#include "vega10_hwmgr.h"
#include "vega10_processpptables.h"
@@ -51,10 +51,6 @@
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
-#define VOLTAGE_SCALE 4
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
-
#define HBM_MEMORY_CHANNEL_WIDTH 128
static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
@@ -79,8 +75,6 @@ static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
-static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
- enum pp_clock_type type, uint32_t mask);
static const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic);
@@ -291,6 +285,48 @@ static int vega10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
return 0;
}
+static int vega10_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct phm_ppt_v2_information *table_info =
+ (struct phm_ppt_v2_information *)(hwmgr->pptable);
+ struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+ struct vega10_odn_vddc_lookup_table *od_lookup_table;
+ struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table[3];
+ struct phm_ppt_v1_clock_voltage_dependency_table *od_table[3];
+ uint32_t i;
+
+ od_lookup_table = &odn_table->vddc_lookup_table;
+ vddc_lookup_table = table_info->vddc_lookup_table;
+
+ for (i = 0; i < vddc_lookup_table->count; i++)
+ od_lookup_table->entries[i].us_vdd = vddc_lookup_table->entries[i].us_vdd;
+
+ od_lookup_table->count = vddc_lookup_table->count;
+
+ dep_table[0] = table_info->vdd_dep_on_sclk;
+ dep_table[1] = table_info->vdd_dep_on_mclk;
+ dep_table[2] = table_info->vdd_dep_on_socclk;
+ od_table[0] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_sclk;
+ od_table[1] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_mclk;
+ od_table[2] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_socclk;
+
+ for (i = 0; i < 3; i++)
+ smu_get_voltage_dependency_table_ppt_v1(dep_table[i], od_table[i]);
+
+ if (odn_table->max_vddc == 0 || odn_table->max_vddc > 2000)
+ odn_table->max_vddc = dep_table[0]->entries[dep_table[0]->count - 1].vddc;
+ if (odn_table->min_vddc == 0 || odn_table->min_vddc > 2000)
+ odn_table->min_vddc = dep_table[0]->entries[0].vddc;
+
+ i = od_table[2]->count - 1;
+ od_table[2]->entries[i].clk = hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+ od_table[2]->entries[i].vddc = odn_table->max_vddc;
+
+ return 0;
+}
+
static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
{
struct vega10_hwmgr *data = hwmgr->backend;
@@ -427,7 +463,6 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
/* ACG firmware has major version 5 */
if ((hwmgr->smu_version & 0xff000000) == 0x5000000)
data->smu_features[GNLD_ACG].supported = true;
-
if (data->registry_data.didt_support)
data->smu_features[GNLD_DIDT].supported = true;
@@ -754,7 +789,6 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
uint32_t config_telemetry = 0;
struct pp_atomfwctrl_voltage_table vol_table;
struct amdgpu_device *adev = hwmgr->adev;
- uint32_t reg;
data = kzalloc(sizeof(struct vega10_hwmgr), GFP_KERNEL);
if (data == NULL)
@@ -860,10 +894,7 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
advanceFanControlParameters.usFanPWMMinLimit *
hwmgr->thermal_controller.fanInfo.ulMaxRPM / 100;
- reg = soc15_get_register_offset(DF_HWID, 0,
- mmDF_CS_AON0_DramBaseAddress0_BASE_IDX,
- mmDF_CS_AON0_DramBaseAddress0);
- data->mem_channels = (cgs_read_register(hwmgr->device, reg) &
+ data->mem_channels = (RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0) &
DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >>
DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
PP_ASSERT_WITH_CODE(data->mem_channels < ARRAY_SIZE(channel_number),
@@ -1370,48 +1401,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
memcpy(&(data->golden_dpm_table), &(data->dpm_table),
sizeof(struct vega10_dpm_table));
- if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
- PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
- data->odn_dpm_table.odn_core_clock_dpm_levels.num_of_pl =
- data->dpm_table.gfx_table.count;
- for (i = 0; i < data->dpm_table.gfx_table.count; i++) {
- data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].clock =
- data->dpm_table.gfx_table.dpm_levels[i].value;
- data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].enabled = true;
- }
-
- data->odn_dpm_table.vdd_dependency_on_sclk.count =
- dep_gfx_table->count;
- for (i = 0; i < dep_gfx_table->count; i++) {
- data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].clk =
- dep_gfx_table->entries[i].clk;
- data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].vddInd =
- dep_gfx_table->entries[i].vddInd;
- data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_enable =
- dep_gfx_table->entries[i].cks_enable;
- data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_voffset =
- dep_gfx_table->entries[i].cks_voffset;
- }
-
- data->odn_dpm_table.odn_memory_clock_dpm_levels.num_of_pl =
- data->dpm_table.mem_table.count;
- for (i = 0; i < data->dpm_table.mem_table.count; i++) {
- data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].clock =
- data->dpm_table.mem_table.dpm_levels[i].value;
- data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].enabled = true;
- }
-
- data->odn_dpm_table.vdd_dependency_on_mclk.count = dep_mclk_table->count;
- for (i = 0; i < dep_mclk_table->count; i++) {
- data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].clk =
- dep_mclk_table->entries[i].clk;
- data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddInd =
- dep_mclk_table->entries[i].vddInd;
- data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddci =
- dep_mclk_table->entries[i].vddci;
- }
- }
-
return 0;
}
@@ -1514,18 +1503,18 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr,
{
struct phm_ppt_v2_information *table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk =
- table_info->vdd_dep_on_sclk;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk;
struct vega10_hwmgr *data = hwmgr->backend;
struct pp_atomfwctrl_clock_dividers_soc15 dividers;
uint32_t gfx_max_clock =
hwmgr->platform_descriptor.overdriveLimit.engineClock;
uint32_t i = 0;
- if (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_VDDC)
+ if (hwmgr->od_enabled)
dep_on_sclk = (struct phm_ppt_v1_clock_voltage_dependency_table *)
- &(data->odn_dpm_table.vdd_dependency_on_sclk);
+ &(data->odn_dpm_table.vdd_dep_on_sclk);
+ else
+ dep_on_sclk = table_info->vdd_dep_on_sclk;
PP_ASSERT_WITH_CODE(dep_on_sclk,
"Invalid SOC_VDD-GFX_CLK Dependency Table!",
@@ -1577,23 +1566,32 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr,
uint32_t soc_clock, uint8_t *current_soc_did,
uint8_t *current_vol_index)
{
+ struct vega10_hwmgr *data = hwmgr->backend;
struct phm_ppt_v2_information *table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc =
- table_info->vdd_dep_on_socclk;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc;
struct pp_atomfwctrl_clock_dividers_soc15 dividers;
uint32_t i;
- PP_ASSERT_WITH_CODE(dep_on_soc,
- "Invalid SOC_VDD-SOC_CLK Dependency Table!",
- return -EINVAL);
- for (i = 0; i < dep_on_soc->count; i++) {
- if (dep_on_soc->entries[i].clk == soc_clock)
- break;
+ if (hwmgr->od_enabled) {
+ dep_on_soc = (struct phm_ppt_v1_clock_voltage_dependency_table *)
+ &data->odn_dpm_table.vdd_dep_on_socclk;
+ for (i = 0; i < dep_on_soc->count; i++) {
+ if (dep_on_soc->entries[i].clk >= soc_clock)
+ break;
+ }
+ } else {
+ dep_on_soc = table_info->vdd_dep_on_socclk;
+ for (i = 0; i < dep_on_soc->count; i++) {
+ if (dep_on_soc->entries[i].clk == soc_clock)
+ break;
+ }
}
+
PP_ASSERT_WITH_CODE(dep_on_soc->count > i,
"Cannot find SOC_CLK in SOC_VDD-SOC_CLK Dependency Table",
return -EINVAL);
+
PP_ASSERT_WITH_CODE(!pp_atomfwctrl_get_gpu_pll_dividers_vega10(hwmgr,
COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
soc_clock, &dividers),
@@ -1602,22 +1600,6 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr,
*current_soc_did = (uint8_t)dividers.ulDid;
*current_vol_index = (uint8_t)(dep_on_soc->entries[i].vddInd);
-
- return 0;
-}
-
-uint16_t vega10_locate_vddc_given_clock(struct pp_hwmgr *hwmgr,
- uint32_t clk,
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
-{
- uint16_t i;
-
- for (i = 0; i < dep_table->count; i++) {
- if (dep_table->entries[i].clk == clk)
- return dep_table->entries[i].vddc;
- }
-
- pr_info("[LocateVddcGivenClock] Cannot locate SOC Vddc for this clock!");
return 0;
}
@@ -1631,8 +1613,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
struct vega10_hwmgr *data = hwmgr->backend;
struct phm_ppt_v2_information *table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_table =
- table_info->vdd_dep_on_socclk;
PPTable_t *pp_table = &(data->smc_state_table.pp_table);
struct vega10_single_dpm_table *dpm_table = &(data->dpm_table.gfx_table);
int result = 0;
@@ -1663,11 +1643,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
dpm_table = &(data->dpm_table.soc_table);
for (i = 0; i < dpm_table->count; i++) {
- pp_table->SocVid[i] =
- (uint8_t)convert_to_vid(
- vega10_locate_vddc_given_clock(hwmgr,
- dpm_table->dpm_levels[i].value,
- dep_table));
result = vega10_populate_single_soc_level(hwmgr,
dpm_table->dpm_levels[i].value,
&(pp_table->SocclkDid[i]),
@@ -1678,7 +1653,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
j = i - 1;
while (i < NUM_SOCCLK_DPM_LEVELS) {
- pp_table->SocVid[i] = pp_table->SocVid[j];
result = vega10_populate_single_soc_level(hwmgr,
dpm_table->dpm_levels[j].value,
&(pp_table->SocclkDid[i]),
@@ -1691,6 +1665,32 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
return result;
}
+static void vega10_populate_vddc_soc_levels(struct pp_hwmgr *hwmgr)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+ struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+ struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table;
+
+ uint8_t soc_vid = 0;
+ uint32_t i, max_vddc_level;
+
+ if (hwmgr->od_enabled)
+ vddc_lookup_table = (struct phm_ppt_v1_voltage_lookup_table *)&data->odn_dpm_table.vddc_lookup_table;
+ else
+ vddc_lookup_table = table_info->vddc_lookup_table;
+
+ max_vddc_level = vddc_lookup_table->count;
+ for (i = 0; i < max_vddc_level; i++) {
+ soc_vid = (uint8_t)convert_to_vid(vddc_lookup_table->entries[i].us_vdd);
+ pp_table->SocVid[i] = soc_vid;
+ }
+ while (i < MAX_REGULAR_DPM_NUMBER) {
+ pp_table->SocVid[i] = soc_vid;
+ i++;
+ }
+}
+
/**
* @brief Populates single SMC GFXCLK structure using the provided clock.
*
@@ -1705,25 +1705,25 @@ static int vega10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
struct vega10_hwmgr *data = hwmgr->backend;
struct phm_ppt_v2_information *table_info =
(struct phm_ppt_v2_information *)(hwmgr->pptable);
- struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk =
- table_info->vdd_dep_on_mclk;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk;
struct pp_atomfwctrl_clock_dividers_soc15 dividers;
uint32_t mem_max_clock =
hwmgr->platform_descriptor.overdriveLimit.memoryClock;
uint32_t i = 0;
- if (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_VDDC)
+ if (hwmgr->od_enabled)
dep_on_mclk = (struct phm_ppt_v1_clock_voltage_dependency_table *)
- &data->odn_dpm_table.vdd_dependency_on_mclk;
+ &data->odn_dpm_table.vdd_dep_on_mclk;
+ else
+ dep_on_mclk = table_info->vdd_dep_on_mclk;
PP_ASSERT_WITH_CODE(dep_on_mclk,
"Invalid SOC_VDD-UCLK Dependency Table!",
return -EINVAL);
- if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK)
+ if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) {
mem_clock = mem_clock > mem_max_clock ? mem_max_clock : mem_clock;
- else {
+ } else {
for (i = 0; i < dep_on_mclk->count; i++) {
if (dep_on_mclk->entries[i].clk == mem_clock)
break;
@@ -2067,6 +2067,9 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
if (data->smu_features[GNLD_AVFS].supported) {
result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params);
if (!result) {
+ data->odn_dpm_table.max_vddc = avfs_params.ulMaxVddc;
+ data->odn_dpm_table.min_vddc = avfs_params.ulMinVddc;
+
pp_table->MinVoltageVid = (uint8_t)
convert_to_vid((uint16_t)(avfs_params.ulMinVddc));
pp_table->MaxVoltageVid = (uint8_t)
@@ -2345,6 +2348,22 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable)
return 0;
}
+static int vega10_update_avfs(struct pp_hwmgr *hwmgr)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+
+ if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+ vega10_avfs_enable(hwmgr, false);
+ } else if (data->need_update_dpm_table) {
+ vega10_avfs_enable(hwmgr, false);
+ vega10_avfs_enable(hwmgr, true);
+ } else {
+ vega10_avfs_enable(hwmgr, true);
+ }
+
+ return 0;
+}
+
static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr)
{
int result = 0;
@@ -2406,6 +2425,10 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
"Failed to setup default DPM tables!",
return result);
+ /* initialize ODN table */
+ if (hwmgr->od_enabled)
+ vega10_odn_initial_default_setting(hwmgr);
+
pp_atomfwctrl_get_voltage_table_v4(hwmgr, VOLTAGE_TYPE_VDDC,
VOLTAGE_OBJ_SVID2, &voltage_table);
pp_table->MaxVidStep = voltage_table.max_vid_step;
@@ -2452,6 +2475,8 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
"Failed to initialize Memory Level!",
return result);
+ vega10_populate_vddc_soc_levels(hwmgr);
+
result = vega10_populate_all_display_clock_levels(hwmgr);
PP_ASSERT_WITH_CODE(!result,
"Failed to initialize Display Level!",
@@ -2481,6 +2506,12 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
data->vbios_boot_state.mvddc = boot_up_values.usMvddc;
data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk;
data->vbios_boot_state.mem_clock = boot_up_values.ulUClk;
+ pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
+ SMU9_SYSPLL0_SOCCLK_ID, &boot_up_values.ulSocClk);
+
+ pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
+ SMU9_SYSPLL0_DCEFCLK_ID, &boot_up_values.ulDCEFClk);
+
data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk;
if (0 != boot_up_values.usVddc) {
@@ -2829,7 +2860,7 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
tmp_result = vega10_construct_voltage_tables(hwmgr);
PP_ASSERT_WITH_CODE(!tmp_result,
- "Failed to contruct voltage tables!",
+ "Failed to construct voltage tables!",
result = tmp_result);
tmp_result = vega10_init_smc_table(hwmgr);
@@ -3028,7 +3059,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
bool disable_mclk_switching_for_frame_lock;
bool disable_mclk_switching_for_vr;
bool force_mclk_high;
- struct cgs_display_info info = {0};
const struct phm_clock_and_voltage_limits *max_limits;
uint32_t i;
struct vega10_hwmgr *data = hwmgr->backend;
@@ -3063,11 +3093,9 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
}
}
- cgs_get_active_displays_info(hwmgr->device, &info);
-
/* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
- minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
- minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+ minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock;
+ minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
if (PP_CAP(PHM_PlatformCaps_StablePState)) {
stable_pstate_sclk_dpm_percentage =
@@ -3107,10 +3135,10 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
- if (info.display_count == 0)
+ if (hwmgr->display_config->num_display == 0)
disable_mclk_switching = false;
else
- disable_mclk_switching = (info.display_count > 1) ||
+ disable_mclk_switching = (hwmgr->display_config->num_display > 1) ||
disable_mclk_switching_for_frame_lock ||
disable_mclk_switching_for_vr ||
force_mclk_high;
@@ -3171,87 +3199,11 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
{
- const struct phm_set_power_state_input *states =
- (const struct phm_set_power_state_input *)input;
- const struct vega10_power_state *vega10_ps =
- cast_const_phw_vega10_power_state(states->pnew_state);
struct vega10_hwmgr *data = hwmgr->backend;
- struct vega10_single_dpm_table *sclk_table =
- &(data->dpm_table.gfx_table);
- uint32_t sclk = vega10_ps->performance_levels
- [vega10_ps->performance_level_count - 1].gfx_clock;
- struct vega10_single_dpm_table *mclk_table =
- &(data->dpm_table.mem_table);
- uint32_t mclk = vega10_ps->performance_levels
- [vega10_ps->performance_level_count - 1].mem_clock;
- struct PP_Clocks min_clocks = {0};
- uint32_t i;
- struct cgs_display_info info = {0};
-
- data->need_update_dpm_table = 0;
-
- if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
- PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
- for (i = 0; i < sclk_table->count; i++) {
- if (sclk == sclk_table->dpm_levels[i].value)
- break;
- }
-
- if (!(data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_SCLK) && i >= sclk_table->count) {
- /* Check SCLK in DAL's minimum clocks
- * in case DeepSleep divider update is required.
- */
- if (data->display_timing.min_clock_in_sr !=
- min_clocks.engineClockInSR &&
- (min_clocks.engineClockInSR >=
- VEGA10_MINIMUM_ENGINE_CLOCK ||
- data->display_timing.min_clock_in_sr >=
- VEGA10_MINIMUM_ENGINE_CLOCK))
- data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK;
- }
-
- cgs_get_active_displays_info(hwmgr->device, &info);
-
- if (data->display_timing.num_existing_displays !=
- info.display_count)
- data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
- } else {
- for (i = 0; i < sclk_table->count; i++) {
- if (sclk == sclk_table->dpm_levels[i].value)
- break;
- }
-
- if (i >= sclk_table->count)
- data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
- else {
- /* Check SCLK in DAL's minimum clocks
- * in case DeepSleep divider update is required.
- */
- if (data->display_timing.min_clock_in_sr !=
- min_clocks.engineClockInSR &&
- (min_clocks.engineClockInSR >=
- VEGA10_MINIMUM_ENGINE_CLOCK ||
- data->display_timing.min_clock_in_sr >=
- VEGA10_MINIMUM_ENGINE_CLOCK))
- data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK;
- }
- for (i = 0; i < mclk_table->count; i++) {
- if (mclk == mclk_table->dpm_levels[i].value)
- break;
- }
-
- cgs_get_active_displays_info(hwmgr->device, &info);
+ if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
+ data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
- if (i >= mclk_table->count)
- data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-
- if (data->display_timing.num_existing_displays !=
- info.display_count ||
- i >= mclk_table->count)
- data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
- }
return 0;
}
@@ -3259,194 +3211,29 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels(
struct pp_hwmgr *hwmgr, const void *input)
{
int result = 0;
- const struct phm_set_power_state_input *states =
- (const struct phm_set_power_state_input *)input;
- const struct vega10_power_state *vega10_ps =
- cast_const_phw_vega10_power_state(states->pnew_state);
struct vega10_hwmgr *data = hwmgr->backend;
- uint32_t sclk = vega10_ps->performance_levels
- [vega10_ps->performance_level_count - 1].gfx_clock;
- uint32_t mclk = vega10_ps->performance_levels
- [vega10_ps->performance_level_count - 1].mem_clock;
- struct vega10_dpm_table *dpm_table = &data->dpm_table;
- struct vega10_dpm_table *golden_dpm_table =
- &data->golden_dpm_table;
- uint32_t dpm_count, clock_percent;
- uint32_t i;
- if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
- PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
-
- if (!data->need_update_dpm_table &&
- !data->apply_optimized_settings &&
- !data->apply_overdrive_next_settings_mask)
- return 0;
-
- if (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_SCLK) {
- for (dpm_count = 0;
- dpm_count < dpm_table->gfx_table.count;
- dpm_count++) {
- dpm_table->gfx_table.dpm_levels[dpm_count].enabled =
- data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].enabled;
- dpm_table->gfx_table.dpm_levels[dpm_count].value =
- data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].clock;
- }
- }
-
- if (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_MCLK) {
- for (dpm_count = 0;
- dpm_count < dpm_table->mem_table.count;
- dpm_count++) {
- dpm_table->mem_table.dpm_levels[dpm_count].enabled =
- data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].enabled;
- dpm_table->mem_table.dpm_levels[dpm_count].value =
- data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].clock;
- }
- }
-
- if ((data->need_update_dpm_table & DPMTABLE_UPDATE_SCLK) ||
- data->apply_optimized_settings ||
- (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_SCLK)) {
- result = vega10_populate_all_graphic_levels(hwmgr);
- PP_ASSERT_WITH_CODE(!result,
- "Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
- return result);
- }
-
- if ((data->need_update_dpm_table & DPMTABLE_UPDATE_MCLK) ||
- (data->apply_overdrive_next_settings_mask &
- DPMTABLE_OD_UPDATE_MCLK)){
- result = vega10_populate_all_memory_levels(hwmgr);
- PP_ASSERT_WITH_CODE(!result,
- "Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
- return result);
- }
- } else {
- if (!data->need_update_dpm_table &&
- !data->apply_optimized_settings)
- return 0;
-
- if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_SCLK &&
- data->smu_features[GNLD_DPM_GFXCLK].supported) {
- dpm_table->
- gfx_table.dpm_levels[dpm_table->gfx_table.count - 1].
- value = sclk;
- if (hwmgr->od_enabled) {
- /* Need to do calculation based on the golden DPM table
- * as the Heatmap GPU Clock axis is also based on
- * the default values
- */
- PP_ASSERT_WITH_CODE(
- golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count - 1].value,
- "Divide by 0!",
- return -1);
-
- dpm_count = dpm_table->gfx_table.count < 2 ?
- 0 : dpm_table->gfx_table.count - 2;
- for (i = dpm_count; i > 1; i--) {
- if (sclk > golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count - 1].value) {
- clock_percent =
- ((sclk - golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count - 1].value) *
- 100) /
- golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count - 1].value;
-
- dpm_table->gfx_table.dpm_levels[i].value =
- golden_dpm_table->gfx_table.dpm_levels[i].value +
- (golden_dpm_table->gfx_table.dpm_levels[i].value *
- clock_percent) / 100;
- } else if (golden_dpm_table->
- gfx_table.dpm_levels[dpm_table->gfx_table.count-1].value >
- sclk) {
- clock_percent =
- ((golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count - 1].value -
- sclk) * 100) /
- golden_dpm_table->gfx_table.dpm_levels
- [golden_dpm_table->gfx_table.count-1].value;
-
- dpm_table->gfx_table.dpm_levels[i].value =
- golden_dpm_table->gfx_table.dpm_levels[i].value -
- (golden_dpm_table->gfx_table.dpm_levels[i].value *
- clock_percent) / 100;
- } else
- dpm_table->gfx_table.dpm_levels[i].value =
- golden_dpm_table->gfx_table.dpm_levels[i].value;
- }
- }
- }
+ if (!data->need_update_dpm_table)
+ return 0;
- if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK &&
- data->smu_features[GNLD_DPM_UCLK].supported) {
- dpm_table->
- mem_table.dpm_levels[dpm_table->mem_table.count - 1].
- value = mclk;
+ if (data->need_update_dpm_table &
+ (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK + DPMTABLE_UPDATE_SOCCLK)) {
+ result = vega10_populate_all_graphic_levels(hwmgr);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
+ return result);
+ }
- if (hwmgr->od_enabled) {
- PP_ASSERT_WITH_CODE(
- golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count - 1].value,
- "Divide by 0!",
- return -1);
+ if (data->need_update_dpm_table &
+ (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) {
+ result = vega10_populate_all_memory_levels(hwmgr);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
+ return result);
+ }
- dpm_count = dpm_table->mem_table.count < 2 ?
- 0 : dpm_table->mem_table.count - 2;
- for (i = dpm_count; i > 1; i--) {
- if (mclk > golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count-1].value) {
- clock_percent = ((mclk -
- golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count-1].value) *
- 100) /
- golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count-1].value;
-
- dpm_table->mem_table.dpm_levels[i].value =
- golden_dpm_table->mem_table.dpm_levels[i].value +
- (golden_dpm_table->mem_table.dpm_levels[i].value *
- clock_percent) / 100;
- } else if (golden_dpm_table->mem_table.dpm_levels
- [dpm_table->mem_table.count-1].value > mclk) {
- clock_percent = ((golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count-1].value - mclk) *
- 100) /
- golden_dpm_table->mem_table.dpm_levels
- [golden_dpm_table->mem_table.count-1].value;
-
- dpm_table->mem_table.dpm_levels[i].value =
- golden_dpm_table->mem_table.dpm_levels[i].value -
- (golden_dpm_table->mem_table.dpm_levels[i].value *
- clock_percent) / 100;
- } else
- dpm_table->mem_table.dpm_levels[i].value =
- golden_dpm_table->mem_table.dpm_levels[i].value;
- }
- }
- }
+ vega10_populate_vddc_soc_levels(hwmgr);
- if ((data->need_update_dpm_table &
- (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK)) ||
- data->apply_optimized_settings) {
- result = vega10_populate_all_graphic_levels(hwmgr);
- PP_ASSERT_WITH_CODE(!result,
- "Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
- return result);
- }
-
- if (data->need_update_dpm_table &
- (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) {
- result = vega10_populate_all_memory_levels(hwmgr);
- PP_ASSERT_WITH_CODE(!result,
- "Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
- return result);
- }
- }
return result;
}
@@ -3742,8 +3529,9 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
PP_ASSERT_WITH_CODE(!result,
"Failed to upload PPtable!", return result);
- data->apply_optimized_settings = false;
- data->apply_overdrive_next_settings_mask = 0;
+ vega10_update_avfs(hwmgr);
+
+ data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
return 0;
}
@@ -3793,16 +3581,18 @@ static uint32_t vega10_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
}
static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr,
- struct pp_gpu_power *query)
+ uint32_t *query)
{
uint32_t value;
+ if (!query)
+ return -EINVAL;
+
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr);
value = smum_get_argument(hwmgr);
- /* power value is an integer */
- memset(query, 0, sizeof *query);
- query->average_gpu_power = value << 8;
+ /* SMC returning actual watts, keep consistent with legacy asics, low 8 bit as 8 fractional bits */
+ *query = value << 8;
return 0;
}
@@ -3810,22 +3600,18 @@ static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr,
static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
void *value, int *size)
{
- uint32_t sclk_idx, mclk_idx, activity_percent = 0;
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t sclk_mhz, mclk_idx, activity_percent = 0;
struct vega10_hwmgr *data = hwmgr->backend;
struct vega10_dpm_table *dpm_table = &data->dpm_table;
int ret = 0;
- uint32_t reg, val_vid;
+ uint32_t val_vid;
switch (idx) {
case AMDGPU_PP_SENSOR_GFX_SCLK:
- smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex);
- sclk_idx = smum_get_argument(hwmgr);
- if (sclk_idx < dpm_table->gfx_table.count) {
- *((uint32_t *)value) = dpm_table->gfx_table.dpm_levels[sclk_idx].value;
- *size = 4;
- } else {
- ret = -EINVAL;
- }
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency);
+ sclk_mhz = smum_get_argument(hwmgr);
+ *((uint32_t *)value) = sclk_mhz * 100;
break;
case AMDGPU_PP_SENSOR_GFX_MCLK:
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex);
@@ -3856,18 +3642,10 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_POWER:
- if (*size < sizeof(struct pp_gpu_power))
- ret = -EINVAL;
- else {
- *size = sizeof(struct pp_gpu_power);
- ret = vega10_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
- }
+ ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value);
break;
case AMDGPU_PP_SENSOR_VDDGFX:
- reg = soc15_get_register_offset(SMUIO_HWID, 0,
- mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX,
- mmSMUSVI0_PLANE0_CURRENTVID);
- val_vid = (cgs_read_register(hwmgr->device, reg) &
+ val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_PLANE0_CURRENTVID) &
SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK) >>
SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT;
*((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid);
@@ -3956,26 +3734,18 @@ static int vega10_notify_smc_display_config_after_ps_adjustment(
(struct phm_ppt_v2_information *)hwmgr->pptable;
struct phm_ppt_v1_clock_voltage_dependency_table *mclk_table = table_info->vdd_dep_on_mclk;
uint32_t idx;
- uint32_t num_active_disps = 0;
- struct cgs_display_info info = {0};
struct PP_Clocks min_clocks = {0};
uint32_t i;
struct pp_display_clock_request clock_req;
- info.mode_info = NULL;
-
- cgs_get_active_displays_info(hwmgr->device, &info);
-
- num_active_disps = info.display_count;
-
- if (num_active_disps > 1)
+ if (hwmgr->display_config->num_display > 1)
vega10_notify_smc_display_change(hwmgr, false);
else
vega10_notify_smc_display_change(hwmgr, true);
- min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
- min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk;
- min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+ min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
+ min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
+ min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
for (i = 0; i < dpm_table->count; i++) {
if (dpm_table->dpm_levels[i].value == min_clocks.dcefClock)
@@ -4120,6 +3890,47 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
}
}
+static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, uint32_t mask)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+
+ switch (type) {
+ case PP_SCLK:
+ data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0;
+ data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0;
+
+ PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+ "Failed to upload boot level to lowest!",
+ return -EINVAL);
+
+ PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+ "Failed to upload dpm max level to highest!",
+ return -EINVAL);
+ break;
+
+ case PP_MCLK:
+ data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0;
+ data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0;
+
+ PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+ "Failed to upload boot level to lowest!",
+ return -EINVAL);
+
+ PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+ "Failed to upload dpm max level to highest!",
+ return -EINVAL);
+
+ break;
+
+ case PP_PCIE:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
enum amd_dpm_forced_level level)
{
@@ -4356,97 +4167,15 @@ static int vega10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
struct vega10_hwmgr *data = hwmgr->backend;
Watermarks_t *table = &(data->smc_state_table.water_marks_table);
int result = 0;
- uint32_t i;
if (!data->registry_data.disable_water_mark) {
- for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) {
- table->WatermarkRow[WM_DCEFCLK][i].MinClock =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) /
- 100);
- table->WatermarkRow[WM_DCEFCLK][i].MaxClock =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) /
- 100);
- table->WatermarkRow[WM_DCEFCLK][i].MinUclk =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) /
- 100);
- table->WatermarkRow[WM_DCEFCLK][i].MaxUclk =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) /
- 100);
- table->WatermarkRow[WM_DCEFCLK][i].WmSetting = (uint8_t)
- wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id;
- }
-
- for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) {
- table->WatermarkRow[WM_SOCCLK][i].MinClock =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) /
- 100);
- table->WatermarkRow[WM_SOCCLK][i].MaxClock =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) /
- 100);
- table->WatermarkRow[WM_SOCCLK][i].MinUclk =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) /
- 100);
- table->WatermarkRow[WM_SOCCLK][i].MaxUclk =
- cpu_to_le16((uint16_t)
- (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) /
- 100);
- table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t)
- wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id;
- }
+ smu_set_watermarks_for_clocks_ranges(table, wm_with_clock_ranges);
data->water_marks_bitmap = WaterMarksExist;
}
return result;
}
-static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
- enum pp_clock_type type, uint32_t mask)
-{
- struct vega10_hwmgr *data = hwmgr->backend;
-
- switch (type) {
- case PP_SCLK:
- data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0;
- data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0;
-
- PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
- "Failed to upload boot level to lowest!",
- return -EINVAL);
-
- PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
- "Failed to upload dpm max level to highest!",
- return -EINVAL);
- break;
-
- case PP_MCLK:
- data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0;
- data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0;
-
- PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
- "Failed to upload boot level to lowest!",
- return -EINVAL);
-
- PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
- "Failed to upload dpm max level to highest!",
- return -EINVAL);
-
- break;
-
- case PP_PCIE:
- default:
- break;
- }
-
- return 0;
-}
-
static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
enum pp_clock_type type, char *buf)
{
@@ -4454,6 +4183,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table);
+ struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL;
+
int i, now, size = 0;
switch (type) {
@@ -4492,6 +4223,40 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
(pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "",
(i == now) ? "*" : "");
break;
+ case OD_SCLK:
+ if (hwmgr->od_enabled) {
+ size = sprintf(buf, "%s:\n", "OD_SCLK");
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
+ for (i = 0; i < podn_vdd_dep->count; i++)
+ size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+ i, podn_vdd_dep->entries[i].clk / 100,
+ podn_vdd_dep->entries[i].vddc);
+ }
+ break;
+ case OD_MCLK:
+ if (hwmgr->od_enabled) {
+ size = sprintf(buf, "%s:\n", "OD_MCLK");
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
+ for (i = 0; i < podn_vdd_dep->count; i++)
+ size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+ i, podn_vdd_dep->entries[i].clk/100,
+ podn_vdd_dep->entries[i].vddc);
+ }
+ break;
+ case OD_RANGE:
+ if (hwmgr->od_enabled) {
+ size = sprintf(buf, "%s:\n", "OD_RANGE");
+ size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.gfx_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+ size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+ data->golden_dpm_table.mem_table.dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+ size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+ data->odn_dpm_table.min_vddc,
+ data->odn_dpm_table.max_vddc);
+ }
+ break;
default:
break;
}
@@ -4501,10 +4266,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
{
struct vega10_hwmgr *data = hwmgr->backend;
- int result = 0;
- uint32_t num_turned_on_displays = 1;
Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table);
- struct cgs_display_info info = {0};
+ int result = 0;
if ((data->water_marks_bitmap & WaterMarksExist) &&
!(data->water_marks_bitmap & WaterMarksLoaded)) {
@@ -4514,10 +4277,8 @@ static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
}
if (data->water_marks_bitmap & WaterMarksLoaded) {
- cgs_get_active_displays_info(hwmgr->device, &info);
- num_turned_on_displays = info.display_count;
smum_send_msg_to_smc_with_parameter(hwmgr,
- PPSMC_MSG_NumOfDisplays, num_turned_on_displays);
+ PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
}
return result;
@@ -4603,15 +4364,12 @@ vega10_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg
{
struct vega10_hwmgr *data = hwmgr->backend;
bool is_update_required = false;
- struct cgs_display_info info = {0, 0, NULL};
- cgs_get_active_displays_info(hwmgr->device, &info);
-
- if (data->display_timing.num_existing_displays != info.display_count)
+ if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
is_update_required = true;
if (PP_CAP(PHM_PlatformCaps_SclkDeepSleep)) {
- if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr)
+ if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr)
is_update_required = true;
}
@@ -4886,6 +4644,200 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui
return 0;
}
+
+static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
+ enum PP_OD_DPM_TABLE_COMMAND type,
+ uint32_t clk,
+ uint32_t voltage)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+ struct vega10_single_dpm_table *golden_table;
+
+ if (voltage < odn_table->min_vddc || voltage > odn_table->max_vddc) {
+ pr_info("OD voltage is out of range [%d - %d] mV\n", odn_table->min_vddc, odn_table->max_vddc);
+ return false;
+ }
+
+ if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
+ golden_table = &(data->golden_dpm_table.gfx_table);
+ if (golden_table->dpm_levels[0].value > clk ||
+ hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) {
+ pr_info("OD engine clock is out of range [%d - %d] MHz\n",
+ golden_table->dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+ return false;
+ }
+ } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
+ golden_table = &(data->golden_dpm_table.mem_table);
+ if (golden_table->dpm_levels[0].value > clk ||
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) {
+ pr_info("OD memory clock is out of range [%d - %d] MHz\n",
+ golden_table->dpm_levels[0].value/100,
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+ struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table;
+ struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table;
+ uint32_t i;
+
+ dep_table = table_info->vdd_dep_on_mclk;
+ odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk);
+
+ for (i = 0; i < dep_table->count; i++) {
+ if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK;
+ return;
+ }
+ }
+
+ dep_table = table_info->vdd_dep_on_sclk;
+ odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk);
+ for (i = 0; i < dep_table->count; i++) {
+ if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK;
+ return;
+ }
+ }
+
+ if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+ data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
+ }
+}
+
+static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
+ enum PP_OD_DPM_TABLE_COMMAND type)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk;
+ struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table;
+
+ struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =
+ &data->odn_dpm_table.vdd_dep_on_socclk;
+ struct vega10_odn_vddc_lookup_table *od_vddc_lookup_table = &data->odn_dpm_table.vddc_lookup_table;
+
+ struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep;
+ uint8_t i, j;
+
+ if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
+ for (i = 0; i < podn_vdd_dep->count - 1; i++)
+ od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
+ if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc)
+ od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
+ } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
+ podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
+ for (i = 0; i < dpm_table->count; i++) {
+ for (j = 0; j < od_vddc_lookup_table->count; j++) {
+ if (od_vddc_lookup_table->entries[j].us_vdd >
+ podn_vdd_dep->entries[i].vddc)
+ break;
+ }
+ if (j == od_vddc_lookup_table->count) {
+ od_vddc_lookup_table->entries[j-1].us_vdd =
+ podn_vdd_dep->entries[i].vddc;
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+ }
+ podn_vdd_dep->entries[i].vddInd = j;
+ }
+ dpm_table = &data->dpm_table.soc_table;
+ for (i = 0; i < dep_table->count; i++) {
+ if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd &&
+ dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) {
+ data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+ podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
+ dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+ }
+ }
+ if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
+ podn_vdd_dep->entries[dep_table->count-1].clk) {
+ data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+ podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
+ dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk;
+ }
+ if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
+ podn_vdd_dep->entries[dep_table->count-1].vddInd) {
+ data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+ podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd;
+ }
+ }
+}
+
+static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
+ enum PP_OD_DPM_TABLE_COMMAND type,
+ long *input, uint32_t size)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_table;
+ struct vega10_single_dpm_table *dpm_table;
+
+ uint32_t input_clk;
+ uint32_t input_vol;
+ uint32_t input_level;
+ uint32_t i;
+
+ PP_ASSERT_WITH_CODE(input, "NULL user input for clock and voltage",
+ return -EINVAL);
+
+ if (!hwmgr->od_enabled) {
+ pr_info("OverDrive feature not enabled\n");
+ return -EINVAL;
+ }
+
+ if (PP_OD_EDIT_SCLK_VDDC_TABLE == type) {
+ dpm_table = &data->dpm_table.gfx_table;
+ podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_sclk;
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+ } else if (PP_OD_EDIT_MCLK_VDDC_TABLE == type) {
+ dpm_table = &data->dpm_table.mem_table;
+ podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_mclk;
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+ } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
+ memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));
+ vega10_odn_initial_default_setting(hwmgr);
+ return 0;
+ } else if (PP_OD_COMMIT_DPM_TABLE == type) {
+ vega10_check_dpm_table_updated(hwmgr);
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+
+ for (i = 0; i < size; i += 3) {
+ if (i + 3 > size || input[i] >= podn_vdd_dep_table->count) {
+ pr_info("invalid clock voltage input\n");
+ return 0;
+ }
+ input_level = input[i];
+ input_clk = input[i+1] * 100;
+ input_vol = input[i+2];
+
+ if (vega10_check_clk_voltage_valid(hwmgr, type, input_clk, input_vol)) {
+ dpm_table->dpm_levels[input_level].value = input_clk;
+ podn_vdd_dep_table->entries[input_level].clk = input_clk;
+ podn_vdd_dep_table->entries[input_level].vddc = input_vol;
+ } else {
+ return -EINVAL;
+ }
+ }
+ vega10_odn_update_soc_table(hwmgr, type);
+ return 0;
+}
+
static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.backend_init = vega10_hwmgr_backend_init,
.backend_fini = vega10_hwmgr_backend_fini,
@@ -4944,6 +4896,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
.get_power_profile_mode = vega10_get_power_profile_mode,
.set_power_profile_mode = vega10_set_power_profile_mode,
.set_power_limit = vega10_set_power_limit,
+ .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
};
int vega10_enable_smc_features(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
index 5339ea1f3dce..aadd6cbc7e85 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
@@ -282,15 +282,21 @@ struct vega10_registry_data {
struct vega10_odn_clock_voltage_dependency_table {
uint32_t count;
- struct phm_ppt_v1_clock_voltage_dependency_record
- entries[MAX_REGULAR_DPM_NUMBER];
+ struct phm_ppt_v1_clock_voltage_dependency_record entries[MAX_REGULAR_DPM_NUMBER];
+};
+
+struct vega10_odn_vddc_lookup_table {
+ uint32_t count;
+ struct phm_ppt_v1_voltage_lookup_record entries[MAX_REGULAR_DPM_NUMBER];
};
struct vega10_odn_dpm_table {
- struct phm_odn_clock_levels odn_core_clock_dpm_levels;
- struct phm_odn_clock_levels odn_memory_clock_dpm_levels;
- struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_sclk;
- struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_mclk;
+ struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_sclk;
+ struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_mclk;
+ struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_socclk;
+ struct vega10_odn_vddc_lookup_table vddc_lookup_table;
+ uint32_t max_vddc;
+ uint32_t min_vddc;
};
struct vega10_odn_fan_table {
@@ -301,8 +307,8 @@ struct vega10_odn_fan_table {
};
struct vega10_hwmgr {
- struct vega10_dpm_table dpm_table;
- struct vega10_dpm_table golden_dpm_table;
+ struct vega10_dpm_table dpm_table;
+ struct vega10_dpm_table golden_dpm_table;
struct vega10_registry_data registry_data;
struct vega10_vbios_boot_state vbios_boot_state;
struct vega10_mclk_latency_table mclk_latency_table;
@@ -368,12 +374,8 @@ struct vega10_hwmgr {
bool need_long_memory_training;
/* Internal settings to apply the application power optimization parameters */
- bool apply_optimized_settings;
uint32_t disable_dpm_mask;
- /* ---- Overdrive next setting ---- */
- uint32_t apply_overdrive_next_settings_mask;
-
/* ---- SMU9 ---- */
struct smu_features smu_features[GNLD_FEATURES_MAX];
struct vega10_smc_state_table smc_state_table;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index ba63faefc61f..a9efd8554fbc 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -27,7 +27,7 @@
#include "vega10_ppsmc.h"
#include "vega10_inc.h"
#include "pp_debug.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
static const struct vega10_didt_config_reg SEDiDtTuningCtrlConfig_Vega10[] =
{
@@ -888,36 +888,36 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable)
if (PP_CAP(PHM_PlatformCaps_DiDtEDCEnable)) {
if (PP_CAP(PHM_PlatformCaps_SQRamping)) {
data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL);
- data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en);
- data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en);
+ data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en);
+ data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data);
}
if (PP_CAP(PHM_PlatformCaps_DBRamping)) {
data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL);
- data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en);
- data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en);
+ data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en);
+ data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data);
}
if (PP_CAP(PHM_PlatformCaps_TDRamping)) {
data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL);
- data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en);
- data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en);
+ data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en);
+ data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data);
}
if (PP_CAP(PHM_PlatformCaps_TCPRamping)) {
data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL);
- data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en);
- data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en);
+ data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en);
+ data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data);
}
if (PP_CAP(PHM_PlatformCaps_DBRRamping)) {
data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL);
- data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en);
- data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en);
+ data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en);
+ data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data);
}
}
@@ -930,20 +930,18 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable)
static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
uint32_t num_se = 0, count, data;
- struct amdgpu_device *adev = hwmgr->adev;
- uint32_t reg;
num_se = adev->gfx.config.max_shader_engines;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
- cgs_lock_grbm_idx(hwmgr->device, true);
- reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+ mutex_lock(&adev->grbm_idx_mutex);
for (count = 0; count < num_se; count++) {
data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
- cgs_write_register(hwmgr->device, reg, data);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT);
@@ -958,43 +956,43 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
if (0 != result)
break;
}
- cgs_write_register(hwmgr->device, reg, 0xE0000000);
- cgs_lock_grbm_idx(hwmgr->device, false);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
vega10_didt_set_mask(hwmgr, true);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
{
- cgs_enter_safe_mode(hwmgr->device, true);
+ struct amdgpu_device *adev = hwmgr->adev;
+
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
vega10_didt_set_mask(hwmgr, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
uint32_t num_se = 0, count, data;
- struct amdgpu_device *adev = hwmgr->adev;
- uint32_t reg;
num_se = adev->gfx.config.max_shader_engines;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
- cgs_lock_grbm_idx(hwmgr->device, true);
- reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+ mutex_lock(&adev->grbm_idx_mutex);
for (count = 0; count < num_se; count++) {
data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
- cgs_write_register(hwmgr->device, reg, data);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT);
@@ -1003,12 +1001,12 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
if (0 != result)
break;
}
- cgs_write_register(hwmgr->device, reg, 0xE0000000);
- cgs_lock_grbm_idx(hwmgr->device, false);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
vega10_didt_set_mask(hwmgr, true);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10);
if (PP_CAP(PHM_PlatformCaps_GCEDC))
@@ -1022,13 +1020,14 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t data;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
vega10_didt_set_mask(hwmgr, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
data = 0x00000000;
@@ -1043,20 +1042,18 @@ static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
uint32_t num_se = 0, count, data;
- struct amdgpu_device *adev = hwmgr->adev;
- uint32_t reg;
num_se = adev->gfx.config.max_shader_engines;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
- cgs_lock_grbm_idx(hwmgr->device, true);
- reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+ mutex_lock(&adev->grbm_idx_mutex);
for (count = 0; count < num_se; count++) {
data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
- cgs_write_register(hwmgr->device, reg, data);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
result = vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1067,46 +1064,46 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr)
if (0 != result)
break;
}
- cgs_write_register(hwmgr->device, reg, 0xE0000000);
- cgs_lock_grbm_idx(hwmgr->device, false);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
vega10_didt_set_mask(hwmgr, true);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr)
{
- cgs_enter_safe_mode(hwmgr->device, true);
+ struct amdgpu_device *adev = hwmgr->adev;
+
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
vega10_didt_set_mask(hwmgr, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
uint32_t num_se = 0;
uint32_t count, data;
- struct amdgpu_device *adev = hwmgr->adev;
- uint32_t reg;
num_se = adev->gfx.config.max_shader_engines;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10);
- cgs_lock_grbm_idx(hwmgr->device, true);
- reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+ mutex_lock(&adev->grbm_idx_mutex);
for (count = 0; count < num_se; count++) {
data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
- cgs_write_register(hwmgr->device, reg, data);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1115,12 +1112,12 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
if (0 != result)
break;
}
- cgs_write_register(hwmgr->device, reg, 0xE0000000);
- cgs_lock_grbm_idx(hwmgr->device, false);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
vega10_didt_set_mask(hwmgr, true);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10);
@@ -1137,13 +1134,14 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t data;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
vega10_didt_set_mask(hwmgr, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
data = 0x00000000;
@@ -1158,15 +1156,14 @@ static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
int result;
- cgs_enter_safe_mode(hwmgr->device, true);
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
- cgs_lock_grbm_idx(hwmgr->device, true);
- reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
- cgs_write_register(hwmgr->device, reg, 0xE0000000);
- cgs_lock_grbm_idx(hwmgr->device, false);
+ mutex_lock(&adev->grbm_idx_mutex);
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1175,7 +1172,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
vega10_didt_set_mask(hwmgr, false);
- cgs_enter_safe_mode(hwmgr->device, false);
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index c61d0744860d..0768d259c07c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -52,7 +52,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr)
if (!table_address) {
table_address = (ATOM_Vega10_POWERPLAYTABLE *)
- cgs_atom_get_data_table(hwmgr->device, index,
+ smu_atom_get_data_table(hwmgr->adev, index,
&size, &frev, &crev);
hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index 9f18226a56ea..aa044c1955fe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -25,7 +25,7 @@
#include "vega10_hwmgr.h"
#include "vega10_ppsmc.h"
#include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "pp_debug.h"
static int vega10_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
@@ -89,6 +89,7 @@ int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr,
int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
{
+ struct amdgpu_device *adev = hwmgr->adev;
struct vega10_hwmgr *data = hwmgr->backend;
uint32_t tach_period;
uint32_t crystal_clock_freq;
@@ -100,10 +101,8 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
if (data->smu_features[GNLD_FAN_CONTROL].supported) {
result = vega10_get_current_rpm(hwmgr, speed);
} else {
- uint32_t reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS);
tach_period =
- CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+ REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS),
CG_TACH_STATUS,
TACH_PERIOD);
@@ -127,26 +126,23 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
*/
int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
{
- uint32_t reg;
-
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
+ struct amdgpu_device *adev = hwmgr->adev;
if (hwmgr->fan_ctrl_is_in_default_mode) {
hwmgr->fan_ctrl_default_mode =
- CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+ REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, FDO_PWM_MODE);
hwmgr->tmin =
- CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+ REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, TMIN);
hwmgr->fan_ctrl_is_in_default_mode = false;
}
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, TMIN, 0));
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, FDO_PWM_MODE, mode));
return 0;
@@ -159,18 +155,15 @@ int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
*/
int vega10_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
-
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
+ struct amdgpu_device *adev = hwmgr->adev;
if (!hwmgr->fan_ctrl_is_in_default_mode) {
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, FDO_PWM_MODE,
hwmgr->fan_ctrl_default_mode));
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, TMIN,
hwmgr->tmin << CG_FDO_CTRL2__TMIN__SHIFT));
hwmgr->fan_ctrl_is_in_default_mode = true;
@@ -257,10 +250,10 @@ int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr)
int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr,
uint32_t speed)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t duty100;
uint32_t duty;
uint64_t tmp64;
- uint32_t reg;
if (hwmgr->thermal_controller.fanInfo.bNoFan)
return 0;
@@ -271,10 +264,7 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr,
if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl))
vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_FDO_CTRL1_BASE_IDX, mmCG_FDO_CTRL1);
-
- duty100 = CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+ duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
CG_FDO_CTRL1, FMAX_DUTY100);
if (duty100 == 0)
@@ -284,10 +274,8 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr,
do_div(tmp64, 100);
duty = (uint32_t)tmp64;
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_FDO_CTRL0_BASE_IDX, mmCG_FDO_CTRL0);
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0),
CG_FDO_CTRL0, FDO_STATIC_DUTY, duty));
return vega10_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC);
@@ -317,10 +305,10 @@ int vega10_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr)
*/
int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t tach_period;
uint32_t crystal_clock_freq;
int result = 0;
- uint32_t reg;
if (hwmgr->thermal_controller.fanInfo.bNoFan ||
(speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) ||
@@ -333,10 +321,8 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
if (!result) {
crystal_clock_freq = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS);
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_TACH_STATUS,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS),
CG_TACH_STATUS, TACH_PERIOD,
tach_period));
}
@@ -350,13 +336,10 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
*/
int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int temp;
- uint32_t reg;
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS);
-
- temp = cgs_read_register(hwmgr->device, reg);
+ temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
@@ -379,11 +362,12 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *range)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
int high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
- uint32_t val, reg;
+ uint32_t val;
if (low < range->min)
low = range->min;
@@ -393,20 +377,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
if (low > high)
return -EINVAL;
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
-
- val = cgs_read_register(hwmgr->device, reg);
+ val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
val &= (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK) &
(~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK) &
(~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK);
- cgs_write_register(hwmgr->device, reg, val);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val);
return 0;
}
@@ -418,21 +399,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
*/
static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
if (hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) {
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_TACH_CTRL_BASE_IDX, mmCG_TACH_CTRL);
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_TACH_CTRL,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL),
CG_TACH_CTRL, EDGE_PER_REV,
hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution - 1));
}
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
- cgs_write_register(hwmgr->device, reg,
- CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+ WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+ REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
CG_FDO_CTRL2, TACH_PWM_RESP_RATE, 0x28));
return 0;
@@ -445,9 +422,9 @@ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr)
*/
static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
struct vega10_hwmgr *data = hwmgr->backend;
uint32_t val = 0;
- uint32_t reg;
if (data->smu_features[GNLD_FW_CTF].supported) {
if (data->smu_features[GNLD_FW_CTF].enabled)
@@ -465,8 +442,7 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
- reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
- cgs_write_register(hwmgr->device, reg, val);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val);
return 0;
}
@@ -477,8 +453,8 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
*/
int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
struct vega10_hwmgr *data = hwmgr->backend;
- uint32_t reg;
if (data->smu_features[GNLD_FW_CTF].supported) {
if (!data->smu_features[GNLD_FW_CTF].enabled)
@@ -493,8 +469,7 @@ int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
data->smu_features[GNLD_FW_CTF].enabled = false;
}
- reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 200de46bd06b..782e2098824d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -34,7 +34,6 @@
#include "atomfirmware.h"
#include "cgs_common.h"
#include "vega12_inc.h"
-#include "pp_soc15.h"
#include "pppcielanes.h"
#include "vega12_hwmgr.h"
#include "vega12_processpptables.h"
@@ -546,6 +545,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -565,6 +565,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -585,6 +586,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -605,6 +607,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -625,6 +628,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -645,6 +649,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -666,6 +671,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -686,6 +692,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -706,6 +713,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -726,6 +734,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
return -EINVAL);
dpm_table->dpm_levels[i].value = clock;
+ dpm_table->dpm_levels[i].enabled = true;
}
vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -992,15 +1001,55 @@ static uint32_t vega12_find_highest_dpm_level(
static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
{
+ struct vega12_hwmgr *data = hwmgr->backend;
+ if (data->smc_state_table.gfx_boot_level !=
+ data->dpm_table.gfx_table.dpm_state.soft_min_level) {
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMinByFreq,
+ PPCLK_GFXCLK<<16 | data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_boot_level].value);
+ data->dpm_table.gfx_table.dpm_state.soft_min_level =
+ data->smc_state_table.gfx_boot_level;
+ }
+
+ if (data->smc_state_table.mem_boot_level !=
+ data->dpm_table.mem_table.dpm_state.soft_min_level) {
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMinByFreq,
+ PPCLK_UCLK<<16 | data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_boot_level].value);
+ data->dpm_table.mem_table.dpm_state.soft_min_level =
+ data->smc_state_table.mem_boot_level;
+ }
+
return 0;
+
}
static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
{
+ struct vega12_hwmgr *data = hwmgr->backend;
+ if (data->smc_state_table.gfx_max_level !=
+ data->dpm_table.gfx_table.dpm_state.soft_max_level) {
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxByFreq,
+ /* plus the vale by 1 to align the resolution */
+ PPCLK_GFXCLK<<16 | (data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_max_level].value + 1));
+ data->dpm_table.gfx_table.dpm_state.soft_max_level =
+ data->smc_state_table.gfx_max_level;
+ }
+
+ if (data->smc_state_table.mem_max_level !=
+ data->dpm_table.mem_table.dpm_state.soft_max_level) {
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetSoftMaxByFreq,
+ /* plus the vale by 1 to align the resolution */
+ PPCLK_UCLK<<16 | (data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_max_level].value + 1));
+ data->dpm_table.mem_table.dpm_state.soft_max_level =
+ data->smc_state_table.mem_max_level;
+ }
+
return 0;
}
-
int vega12_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable)
{
struct vega12_hwmgr *data =
@@ -1064,8 +1113,7 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
return (mem_clk * 100);
}
-static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr,
- struct pp_gpu_power *query)
+static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
{
#if 0
uint32_t value;
@@ -1077,7 +1125,7 @@ static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr,
vega12_read_arg_from_smc(hwmgr, &value);
/* power value is an integer */
- query->average_gpu_power = value << 8;
+ *query = value << 8;
#endif
return 0;
}
@@ -1186,12 +1234,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_POWER:
- if (*size < sizeof(struct pp_gpu_power))
- ret = -EINVAL;
- else {
- *size = sizeof(struct pp_gpu_power);
- ret = vega12_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
- }
+ ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
+
break;
default:
ret = -EINVAL;
@@ -1260,23 +1304,18 @@ static int vega12_notify_smc_display_config_after_ps_adjustment(
{
struct vega12_hwmgr *data =
(struct vega12_hwmgr *)(hwmgr->backend);
- uint32_t num_active_disps = 0;
- struct cgs_display_info info = {0};
struct PP_Clocks min_clocks = {0};
struct pp_display_clock_request clock_req;
uint32_t clk_request;
- info.mode_info = NULL;
- cgs_get_active_displays_info(hwmgr->device, &info);
- num_active_disps = info.display_count;
- if (num_active_disps > 1)
+ if (hwmgr->display_config->num_display > 1)
vega12_notify_smc_display_change(hwmgr, false);
else
vega12_notify_smc_display_change(hwmgr, true);
- min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
- min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk;
- min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+ min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
+ min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
+ min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
if (data->smu_features[GNLD_DPM_DCEFCLK].supported) {
clock_req.clock_type = amd_pp_dcef_clock;
@@ -1832,9 +1871,7 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
int result = 0;
- uint32_t num_turned_on_displays = 1;
Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table);
- struct cgs_display_info info = {0};
if ((data->water_marks_bitmap & WaterMarksExist) &&
!(data->water_marks_bitmap & WaterMarksLoaded)) {
@@ -1846,12 +1883,9 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
if ((data->water_marks_bitmap & WaterMarksExist) &&
data->smu_features[GNLD_DPM_DCEFCLK].supported &&
- data->smu_features[GNLD_DPM_SOCCLK].supported) {
- cgs_get_active_displays_info(hwmgr->device, &info);
- num_turned_on_displays = info.display_count;
+ data->smu_features[GNLD_DPM_SOCCLK].supported)
smum_send_msg_to_smc_with_parameter(hwmgr,
- PPSMC_MSG_NumOfDisplays, num_turned_on_displays);
- }
+ PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
return result;
}
@@ -1894,15 +1928,12 @@ vega12_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
bool is_update_required = false;
- struct cgs_display_info info = {0, 0, NULL};
-
- cgs_get_active_displays_info(hwmgr->device, &info);
- if (data->display_timing.num_existing_displays != info.display_count)
+ if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
is_update_required = true;
if (data->registry_data.gfx_clk_deep_sleep_support) {
- if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr)
+ if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr)
is_update_required = true;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index bc98b1df3b65..e81ded1ec198 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -33,7 +33,7 @@
#define WaterMarksExist 1
#define WaterMarksLoaded 2
-#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 8
+#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 16
#define VG12_PSUEDO_NUM_SOCCLK_DPM_LEVELS 8
#define VG12_PSUEDO_NUM_DCEFCLK_DPM_LEVELS 8
#define VG12_PSUEDO_NUM_UCLK_DPM_LEVELS 4
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
index b34113f45904..888ddca902d8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
@@ -51,7 +51,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr)
if (!table_address) {
table_address = (ATOM_Vega12_POWERPLAYTABLE *)
- cgs_atom_get_data_table(hwmgr->device, index,
+ smu_atom_get_data_table(hwmgr->adev, index,
&size, &frev, &crev);
hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/
@@ -224,6 +224,11 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
ppsmc_pptable->AcgGfxclkSpreadPercent = smc_dpm_table.acggfxclkspreadpercent;
ppsmc_pptable->AcgGfxclkSpreadFreq = smc_dpm_table.acggfxclkspreadfreq;
+ /* 0xFFFF will disable the ACG feature */
+ if (!(hwmgr->feature_mask & PP_ACG_MASK)) {
+ ppsmc_pptable->AcgThresholdFreqHigh = 0xFFFF;
+ ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
index df0fa815cd6e..cfd9e6ccb790 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
@@ -26,7 +26,7 @@
#include "vega12_smumgr.h"
#include "vega12_ppsmc.h"
#include "vega12_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "pp_debug.h"
static int vega12_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
@@ -147,13 +147,10 @@ int vega12_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr)
*/
int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int temp = 0;
- uint32_t reg;
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS);
-
- temp = cgs_read_register(hwmgr->device, reg);
+ temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
@@ -175,11 +172,12 @@ int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr)
static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *range)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int low = VEGA12_THERMAL_MINIMUM_ALERT_TEMP *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
int high = VEGA12_THERMAL_MAXIMUM_ALERT_TEMP *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
- uint32_t val, reg;
+ uint32_t val;
if (low < range->min)
low = range->min;
@@ -189,18 +187,15 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
if (low > high)
return -EINVAL;
- reg = soc15_get_register_offset(THM_HWID, 0,
- mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
-
- val = cgs_read_register(hwmgr->device, reg);
+ val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
- val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+ val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
- cgs_write_register(hwmgr->device, reg, val);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val);
return 0;
}
@@ -212,15 +207,14 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
*/
static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t val = 0;
- uint32_t reg;
val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT);
val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
- reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
- cgs_write_register(hwmgr->device, reg, val);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val);
return 0;
}
@@ -231,10 +225,9 @@ static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr)
*/
int vega12_thermal_disable_alert(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
- reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 8b78bbecd1bc..a202247c9894 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -377,11 +377,7 @@ struct phm_clocks {
#define DPMTABLE_UPDATE_SCLK 0x00000004
#define DPMTABLE_UPDATE_MCLK 0x00000008
#define DPMTABLE_OD_UPDATE_VDDC 0x00000010
-
-/* To determine if sclk and mclk are in overdrive state */
-#define SCLK_OVERDRIVE_ENABLED 0x00000001
-#define MCLK_OVERDRIVE_ENABLED 0x00000002
-#define VDDC_OVERDRIVE_ENABLED 0x00000010
+#define DPMTABLE_UPDATE_SOCCLK 0x00000020
struct phm_odn_performance_level {
uint32_t clock;
@@ -414,7 +410,10 @@ extern int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
struct pp_power_state *adjusted_ps,
const struct pp_power_state *current_ps);
+extern int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr);
+
extern int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level);
+extern int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr);
extern int phm_display_configuration_changed(struct pp_hwmgr *hwmgr);
extern int phm_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr);
extern int phm_register_irq_handlers(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 17f811d181c8..b99fb8ac822c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -38,6 +38,8 @@ struct phm_fan_speed_info;
struct pp_atomctrl_voltage_table;
#define VOLTAGE_SCALE 4
+#define VOLTAGE_VID_OFFSET_SCALE1 625
+#define VOLTAGE_VID_OFFSET_SCALE2 100
enum DISPLAY_GAP {
DISPLAY_GAP_VBLANK_OR_WM = 0, /* Wait for vblank or MCHG watermark. */
@@ -64,24 +66,6 @@ struct vi_dpm_table {
#define PCIE_PERF_REQ_GEN2 3
#define PCIE_PERF_REQ_GEN3 4
-enum PP_FEATURE_MASK {
- PP_SCLK_DPM_MASK = 0x1,
- PP_MCLK_DPM_MASK = 0x2,
- PP_PCIE_DPM_MASK = 0x4,
- PP_SCLK_DEEP_SLEEP_MASK = 0x8,
- PP_POWER_CONTAINMENT_MASK = 0x10,
- PP_UVD_HANDSHAKE_MASK = 0x20,
- PP_SMC_VOLTAGE_CONTROL_MASK = 0x40,
- PP_VBI_TIME_SUPPORT_MASK = 0x80,
- PP_ULV_MASK = 0x100,
- PP_ENABLE_GFX_CG_THRU_SMU = 0x200,
- PP_CLOCK_STRETCH_MASK = 0x400,
- PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800,
- PP_SOCCLK_DPM_MASK = 0x1000,
- PP_DCEFCLK_DPM_MASK = 0x2000,
- PP_OVERDRIVE_MASK = 0x4000,
-};
-
enum PHM_BackEnd_Magic {
PHM_Dummy_Magic = 0xAA5555AA,
PHM_RV770_Magic = 0xDCBAABCD,
@@ -245,6 +229,8 @@ struct pp_hwmgr_func {
struct pp_power_state *prequest_ps,
const struct pp_power_state *pcurrent_ps);
+ int (*apply_clocks_adjust_rules)(struct pp_hwmgr *hwmgr);
+
int (*force_dpm_level)(struct pp_hwmgr *hw_mgr,
enum amd_dpm_forced_level level);
@@ -268,6 +254,7 @@ struct pp_hwmgr_func {
const void *state);
int (*enable_clock_power_gating)(struct pp_hwmgr *hwmgr);
int (*notify_smc_display_config_after_ps_adjustment)(struct pp_hwmgr *hwmgr);
+ int (*pre_display_config_changed)(struct pp_hwmgr *hwmgr);
int (*display_config_changed)(struct pp_hwmgr *hwmgr);
int (*disable_clock_power_gating)(struct pp_hwmgr *hwmgr);
int (*update_clock_gatings)(struct pp_hwmgr *hwmgr,
@@ -312,6 +299,7 @@ struct pp_hwmgr_func {
int (*display_clock_voltage_request)(struct pp_hwmgr *hwmgr,
struct pp_display_clock_request *clock);
int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
+ int (*gfx_off_control)(struct pp_hwmgr *hwmgr, bool enable);
int (*power_off_asic)(struct pp_hwmgr *hwmgr);
int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask);
int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
@@ -341,6 +329,7 @@ struct pp_hwmgr_func {
long *input, uint32_t size);
int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n);
int (*set_mmhub_powergating_by_smu)(struct pp_hwmgr *hwmgr);
+ int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr);
};
struct pp_table_func {
@@ -718,6 +707,7 @@ struct pp_hwmgr {
uint32_t chip_family;
uint32_t chip_id;
uint32_t smu_version;
+ bool not_vf;
bool pm_en;
struct mutex smu_lock;
@@ -764,7 +754,7 @@ struct pp_hwmgr {
struct pp_power_state *request_ps;
struct pp_power_state *boot_ps;
struct pp_power_state *uvd_ps;
- struct amd_pp_display_configuration display_config;
+ const struct amd_pp_display_configuration *display_config;
uint32_t feature_mask;
bool avfs_supported;
/* UMD Pstate */
@@ -782,10 +772,13 @@ struct pp_hwmgr {
};
int hwmgr_early_init(struct pp_hwmgr *hwmgr);
+int hwmgr_sw_init(struct pp_hwmgr *hwmgr);
+int hwmgr_sw_fini(struct pp_hwmgr *hwmgr);
int hwmgr_hw_init(struct pp_hwmgr *hwmgr);
int hwmgr_hw_fini(struct pp_hwmgr *hwmgr);
-int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr);
-int hwmgr_hw_resume(struct pp_hwmgr *hwmgr);
+int hwmgr_suspend(struct pp_hwmgr *hwmgr);
+int hwmgr_resume(struct pp_hwmgr *hwmgr);
+
int hwmgr_handle_task(struct pp_hwmgr *hwmgr,
enum amd_pp_task task_id,
enum amd_pm_state_type *user_state);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
index 426bff2aad2b..a2991fa2e6f8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
@@ -75,13 +75,15 @@
#define PPSMC_MSG_GetMinGfxclkFrequency 0x2C
#define PPSMC_MSG_GetMaxGfxclkFrequency 0x2D
#define PPSMC_MSG_SoftReset 0x2E
+#define PPSMC_MSG_SetGfxCGPG 0x2F
#define PPSMC_MSG_SetSoftMaxGfxClk 0x30
#define PPSMC_MSG_SetHardMinGfxClk 0x31
#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x32
#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x33
#define PPSMC_MSG_SetSoftMaxVcn 0x34
#define PPSMC_MSG_PowerGateMmHub 0x35
-#define PPSMC_Message_Count 0x36
+#define PPSMC_MSG_SetRccPfcPmeRestoreRegister 0x36
+#define PPSMC_Message_Count 0x37
typedef uint16_t PPSMC_Result;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75.h b/drivers/gpu/drm/amd/powerplay/inc/smu75.h
new file mode 100644
index 000000000000..771523001533
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu75.h
@@ -0,0 +1,760 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef SMU75_H
+#define SMU75_H
+
+#pragma pack(push, 1)
+
+typedef struct {
+ uint32_t high;
+ uint32_t low;
+} data_64_t;
+
+typedef struct {
+ data_64_t high;
+ data_64_t low;
+} data_128_t;
+
+#define SMU__DGPU_ONLY
+
+#define SMU__NUM_SCLK_DPM_STATE 8
+#define SMU__NUM_MCLK_DPM_LEVELS 4
+#define SMU__NUM_LCLK_DPM_LEVELS 8
+#define SMU__NUM_PCIE_DPM_LEVELS 8
+
+#define SMU7_CONTEXT_ID_SMC 1
+#define SMU7_CONTEXT_ID_VBIOS 2
+
+#define SMU75_MAX_LEVELS_VDDC 16
+#define SMU75_MAX_LEVELS_VDDGFX 16
+#define SMU75_MAX_LEVELS_VDDCI 8
+#define SMU75_MAX_LEVELS_MVDD 4
+
+#define SMU_MAX_SMIO_LEVELS 4
+
+#define SMU75_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE
+#define SMU75_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS
+#define SMU75_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS
+#define SMU75_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS
+#define SMU75_MAX_LEVELS_UVD 8
+#define SMU75_MAX_LEVELS_VCE 8
+#define SMU75_MAX_LEVELS_ACP 8
+#define SMU75_MAX_LEVELS_SAMU 8
+#define SMU75_MAX_ENTRIES_SMIO 32
+
+#define DPM_NO_LIMIT 0
+#define DPM_NO_UP 1
+#define DPM_GO_DOWN 2
+#define DPM_GO_UP 3
+
+#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0
+#define SMU7_FIRST_DPM_MEMORY_LEVEL 0
+
+#define GPIO_CLAMP_MODE_VRHOT 1
+#define GPIO_CLAMP_MODE_THERM 2
+#define GPIO_CLAMP_MODE_DC 4
+
+#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0
+#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7<<SCRATCH_B_TARG_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_PCIE_INDEX_SHIFT 3
+#define SCRATCH_B_CURR_PCIE_INDEX_MASK (0x7<<SCRATCH_B_CURR_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_UVD_INDEX_SHIFT 6
+#define SCRATCH_B_TARG_UVD_INDEX_MASK (0x7<<SCRATCH_B_TARG_UVD_INDEX_SHIFT)
+#define SCRATCH_B_CURR_UVD_INDEX_SHIFT 9
+#define SCRATCH_B_CURR_UVD_INDEX_MASK (0x7<<SCRATCH_B_CURR_UVD_INDEX_SHIFT)
+#define SCRATCH_B_TARG_VCE_INDEX_SHIFT 12
+#define SCRATCH_B_TARG_VCE_INDEX_MASK (0x7<<SCRATCH_B_TARG_VCE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_VCE_INDEX_SHIFT 15
+#define SCRATCH_B_CURR_VCE_INDEX_MASK (0x7<<SCRATCH_B_CURR_VCE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_ACP_INDEX_SHIFT 18
+#define SCRATCH_B_TARG_ACP_INDEX_MASK (0x7<<SCRATCH_B_TARG_ACP_INDEX_SHIFT)
+#define SCRATCH_B_CURR_ACP_INDEX_SHIFT 21
+#define SCRATCH_B_CURR_ACP_INDEX_MASK (0x7<<SCRATCH_B_CURR_ACP_INDEX_SHIFT)
+#define SCRATCH_B_TARG_SAMU_INDEX_SHIFT 24
+#define SCRATCH_B_TARG_SAMU_INDEX_MASK (0x7<<SCRATCH_B_TARG_SAMU_INDEX_SHIFT)
+#define SCRATCH_B_CURR_SAMU_INDEX_SHIFT 27
+#define SCRATCH_B_CURR_SAMU_INDEX_MASK (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT)
+
+/* Virtualization Defines */
+#define CG_XDMA_MASK 0x1
+#define CG_XDMA_SHIFT 0
+#define CG_UVD_MASK 0x2
+#define CG_UVD_SHIFT 1
+#define CG_VCE_MASK 0x4
+#define CG_VCE_SHIFT 2
+#define CG_SAMU_MASK 0x8
+#define CG_SAMU_SHIFT 3
+#define CG_GFX_MASK 0x10
+#define CG_GFX_SHIFT 4
+#define CG_SDMA_MASK 0x20
+#define CG_SDMA_SHIFT 5
+#define CG_HDP_MASK 0x40
+#define CG_HDP_SHIFT 6
+#define CG_MC_MASK 0x80
+#define CG_MC_SHIFT 7
+#define CG_DRM_MASK 0x100
+#define CG_DRM_SHIFT 8
+#define CG_ROM_MASK 0x200
+#define CG_ROM_SHIFT 9
+#define CG_BIF_MASK 0x400
+#define CG_BIF_SHIFT 10
+
+#if defined SMU__DGPU_ONLY
+#define SMU75_DTE_ITERATIONS 5
+#define SMU75_DTE_SOURCES 3
+#define SMU75_DTE_SINKS 1
+#define SMU75_NUM_CPU_TES 0
+#define SMU75_NUM_GPU_TES 1
+#define SMU75_NUM_NON_TES 2
+#define SMU75_DTE_FAN_SCALAR_MIN 0x100
+#define SMU75_DTE_FAN_SCALAR_MAX 0x166
+#define SMU75_DTE_FAN_TEMP_MAX 93
+#define SMU75_DTE_FAN_TEMP_MIN 83
+#endif
+#define SMU75_THERMAL_INPUT_LOOP_COUNT 2
+#define SMU75_THERMAL_CLAMP_MODE_COUNT 2
+
+#define EXP_M1_1 93
+#define EXP_M2_1 195759
+#define EXP_B_1 111176531
+
+#define EXP_M1_2 67
+#define EXP_M2_2 153720
+#define EXP_B_2 94415767
+
+#define EXP_M1_3 48
+#define EXP_M2_3 119796
+#define EXP_B_3 79195279
+
+#define EXP_M1_4 550
+#define EXP_M2_4 1484190
+#define EXP_B_4 1051432828
+
+#define EXP_M1_5 394
+#define EXP_M2_5 1143049
+#define EXP_B_5 864288432
+
+struct SMU7_HystController_Data {
+ uint16_t waterfall_up;
+ uint16_t waterfall_down;
+ uint16_t waterfall_limit;
+ uint16_t release_cnt;
+ uint16_t release_limit;
+ uint16_t spare;
+};
+
+typedef struct SMU7_HystController_Data SMU7_HystController_Data;
+
+struct SMU75_PIDController {
+ uint32_t Ki;
+ int32_t LFWindupUpperLim;
+ int32_t LFWindupLowerLim;
+ uint32_t StatePrecision;
+ uint32_t LfPrecision;
+ uint32_t LfOffset;
+ uint32_t MaxState;
+ uint32_t MaxLfFraction;
+ uint32_t StateShift;
+};
+
+typedef struct SMU75_PIDController SMU75_PIDController;
+
+struct SMU7_LocalDpmScoreboard {
+ uint32_t PercentageBusy;
+
+ int32_t PIDError;
+ int32_t PIDIntegral;
+ int32_t PIDOutput;
+
+ uint32_t SigmaDeltaAccum;
+ uint32_t SigmaDeltaOutput;
+ uint32_t SigmaDeltaLevel;
+
+ uint32_t UtilizationSetpoint;
+
+ uint8_t TdpClampMode;
+ uint8_t TdcClampMode;
+ uint8_t ThermClampMode;
+ uint8_t VoltageBusy;
+
+ int8_t CurrLevel;
+ int8_t TargLevel;
+ uint8_t LevelChangeInProgress;
+ uint8_t UpHyst;
+
+ uint8_t DownHyst;
+ uint8_t VoltageDownHyst;
+ uint8_t DpmEnable;
+ uint8_t DpmRunning;
+
+ uint8_t DpmForce;
+ uint8_t DpmForceLevel;
+ uint8_t DisplayWatermark;
+ uint8_t McArbIndex;
+
+ uint32_t MinimumPerfSclk;
+
+ uint8_t AcpiReq;
+ uint8_t AcpiAck;
+ uint8_t GfxClkSlow;
+ uint8_t GpioClampMode;
+
+ uint8_t EnableModeSwitchRLCNotification;
+ uint8_t EnabledLevelsChange;
+ uint8_t DteClampMode;
+ uint8_t FpsClampMode;
+
+ uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_GRAPHICS];
+ uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_GRAPHICS];
+
+ void (*TargetStateCalculator)(uint8_t);
+ void (*SavedTargetStateCalculator)(uint8_t);
+
+ uint16_t AutoDpmInterval;
+ uint16_t AutoDpmRange;
+
+ uint8_t FpsEnabled;
+ uint8_t MaxPerfLevel;
+ uint8_t AllowLowClkInterruptToHost;
+ uint8_t FpsRunning;
+
+ uint32_t MaxAllowedFrequency;
+
+ uint32_t FilteredSclkFrequency;
+ uint32_t LastSclkFrequency;
+ uint32_t FilteredSclkFrequencyCnt;
+
+ uint8_t MinPerfLevel;
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+ uint8_t ScksClampMode;
+ uint8_t padding[2];
+#else
+ uint8_t padding[3];
+#endif
+
+ uint16_t FpsAlpha;
+ uint16_t DeltaTime;
+ uint32_t CurrentFps;
+ uint32_t FilteredFps;
+ uint32_t FrameCount;
+ uint32_t FrameCountLast;
+ uint16_t FpsTargetScalar;
+ uint16_t FpsWaterfallLimitScalar;
+ uint16_t FpsAlphaScalar;
+ uint16_t spare8;
+ SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_LocalDpmScoreboard SMU7_LocalDpmScoreboard;
+
+#define SMU7_MAX_VOLTAGE_CLIENTS 12
+
+typedef uint8_t (*VoltageChangeHandler_t)(uint16_t, uint8_t);
+
+#define VDDC_MASK 0x00007FFF
+#define VDDC_SHIFT 0
+#define VDDCI_MASK 0x3FFF8000
+#define VDDCI_SHIFT 15
+#define PHASES_MASK 0xC0000000
+#define PHASES_SHIFT 30
+
+typedef uint32_t SMU_VoltageLevel;
+
+struct SMU7_VoltageScoreboard {
+ SMU_VoltageLevel TargetVoltage;
+ uint16_t MaxVid;
+ uint8_t HighestVidOffset;
+ uint8_t CurrentVidOffset;
+
+ uint16_t CurrentVddc;
+ uint16_t CurrentVddci;
+
+ uint8_t ControllerBusy;
+ uint8_t CurrentVid;
+ uint8_t CurrentVddciVid;
+ uint8_t padding;
+
+ SMU_VoltageLevel RequestedVoltage[SMU7_MAX_VOLTAGE_CLIENTS];
+ SMU_VoltageLevel TargetVoltageState;
+ uint8_t EnabledRequest[SMU7_MAX_VOLTAGE_CLIENTS];
+
+ uint8_t padding2;
+ uint8_t padding3;
+ uint8_t ControllerEnable;
+ uint8_t ControllerRunning;
+ uint16_t CurrentStdVoltageHiSidd;
+ uint16_t CurrentStdVoltageLoSidd;
+ uint8_t OverrideVoltage;
+ uint8_t padding4;
+ uint8_t padding5;
+ uint8_t CurrentPhases;
+
+ VoltageChangeHandler_t ChangeVddc;
+ VoltageChangeHandler_t ChangeVddci;
+ VoltageChangeHandler_t ChangePhase;
+ VoltageChangeHandler_t ChangeMvdd;
+
+ VoltageChangeHandler_t functionLinks[6];
+
+ uint16_t * VddcFollower1;
+ int16_t Driver_OD_RequestedVidOffset1;
+ int16_t Driver_OD_RequestedVidOffset2;
+};
+
+typedef struct SMU7_VoltageScoreboard SMU7_VoltageScoreboard;
+
+#define SMU7_MAX_PCIE_LINK_SPEEDS 3
+
+struct SMU7_PCIeLinkSpeedScoreboard {
+ uint8_t DpmEnable;
+ uint8_t DpmRunning;
+ uint8_t DpmForce;
+ uint8_t DpmForceLevel;
+
+ uint8_t CurrentLinkSpeed;
+ uint8_t EnabledLevelsChange;
+ uint16_t AutoDpmInterval;
+
+ uint16_t AutoDpmRange;
+ uint16_t AutoDpmCount;
+
+ uint8_t DpmMode;
+ uint8_t AcpiReq;
+ uint8_t AcpiAck;
+ uint8_t CurrentLinkLevel;
+};
+
+typedef struct SMU7_PCIeLinkSpeedScoreboard SMU7_PCIeLinkSpeedScoreboard;
+
+#define SMU7_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16
+#define SMU7_LKGE_LUT_NUM_OF_VOLT_ENTRIES 16
+
+#define SMU7_SCALE_I 7
+#define SMU7_SCALE_R 12
+
+struct SMU7_PowerScoreboard {
+ uint32_t GpuPower;
+
+ uint32_t VddcPower;
+ uint32_t VddcVoltage;
+ uint32_t VddcCurrent;
+
+ uint32_t VddciPower;
+ uint32_t VddciVoltage;
+ uint32_t VddciCurrent;
+
+ uint32_t RocPower;
+
+ uint16_t Telemetry_1_slope;
+ uint16_t Telemetry_2_slope;
+ int32_t Telemetry_1_offset;
+ int32_t Telemetry_2_offset;
+
+ uint8_t MCLK_patch_flag;
+ uint8_t reserved[3];
+};
+
+typedef struct SMU7_PowerScoreboard SMU7_PowerScoreboard;
+
+#define SMU7_SCLK_DPM_CONFIG_MASK 0x01
+#define SMU7_VOLTAGE_CONTROLLER_CONFIG_MASK 0x02
+#define SMU7_THERMAL_CONTROLLER_CONFIG_MASK 0x04
+#define SMU7_MCLK_DPM_CONFIG_MASK 0x08
+#define SMU7_UVD_DPM_CONFIG_MASK 0x10
+#define SMU7_VCE_DPM_CONFIG_MASK 0x20
+#define SMU7_ACP_DPM_CONFIG_MASK 0x40
+#define SMU7_SAMU_DPM_CONFIG_MASK 0x80
+#define SMU7_PCIEGEN_DPM_CONFIG_MASK 0x100
+
+#define SMU7_ACP_MCLK_HANDSHAKE_DISABLE 0x00000001
+#define SMU7_ACP_SCLK_HANDSHAKE_DISABLE 0x00000002
+#define SMU7_UVD_MCLK_HANDSHAKE_DISABLE 0x00000100
+#define SMU7_UVD_SCLK_HANDSHAKE_DISABLE 0x00000200
+#define SMU7_VCE_MCLK_HANDSHAKE_DISABLE 0x00010000
+#define SMU7_VCE_SCLK_HANDSHAKE_DISABLE 0x00020000
+
+struct SMU75_SoftRegisters {
+ uint32_t RefClockFrequency;
+ uint32_t PmTimerPeriod;
+ uint32_t FeatureEnables;
+#if defined (SMU__DGPU_ONLY)
+ uint32_t PreVBlankGap;
+ uint32_t VBlankTimeout;
+ uint32_t TrainTimeGap;
+ uint32_t MvddSwitchTime;
+ uint32_t LongestAcpiTrainTime;
+ uint32_t AcpiDelay;
+ uint32_t G5TrainTime;
+ uint32_t DelayMpllPwron;
+ uint32_t VoltageChangeTimeout;
+#endif
+ uint32_t HandshakeDisables;
+
+ uint8_t DisplayPhy1Config;
+ uint8_t DisplayPhy2Config;
+ uint8_t DisplayPhy3Config;
+ uint8_t DisplayPhy4Config;
+
+ uint8_t DisplayPhy5Config;
+ uint8_t DisplayPhy6Config;
+ uint8_t DisplayPhy7Config;
+ uint8_t DisplayPhy8Config;
+
+ uint32_t AverageGraphicsActivity;
+ uint32_t AverageMemoryActivity;
+ uint32_t AverageGioActivity;
+
+ uint8_t SClkDpmEnabledLevels;
+ uint8_t MClkDpmEnabledLevels;
+ uint8_t LClkDpmEnabledLevels;
+ uint8_t PCIeDpmEnabledLevels;
+
+ uint8_t UVDDpmEnabledLevels;
+ uint8_t SAMUDpmEnabledLevels;
+ uint8_t ACPDpmEnabledLevels;
+ uint8_t VCEDpmEnabledLevels;
+
+ uint32_t DRAM_LOG_ADDR_H;
+ uint32_t DRAM_LOG_ADDR_L;
+ uint32_t DRAM_LOG_PHY_ADDR_H;
+ uint32_t DRAM_LOG_PHY_ADDR_L;
+ uint32_t DRAM_LOG_BUFF_SIZE;
+ uint32_t UlvEnterCount;
+ uint32_t UlvTime;
+ uint32_t UcodeLoadStatus;
+ uint32_t AllowMvddSwitch;
+ uint8_t Activity_Weight;
+ uint8_t Reserved8[3];
+};
+
+typedef struct SMU75_SoftRegisters SMU75_SoftRegisters;
+
+struct SMU75_Firmware_Header {
+ uint32_t Digest[5];
+ uint32_t Version;
+ uint32_t HeaderSize;
+ uint32_t Flags;
+ uint32_t EntryPoint;
+ uint32_t CodeSize;
+ uint32_t ImageSize;
+
+ uint32_t Rtos;
+ uint32_t SoftRegisters;
+ uint32_t DpmTable;
+ uint32_t FanTable;
+ uint32_t CacConfigTable;
+ uint32_t CacStatusTable;
+ uint32_t mcRegisterTable;
+ uint32_t mcArbDramTimingTable;
+ uint32_t PmFuseTable;
+ uint32_t Globals;
+ uint32_t ClockStretcherTable;
+ uint32_t VftTable;
+ uint32_t Reserved1;
+ uint32_t AvfsCksOff_AvfsGbvTable;
+ uint32_t AvfsCksOff_BtcGbvTable;
+ uint32_t MM_AvfsTable;
+ uint32_t PowerSharingTable;
+ uint32_t AvfsTable;
+ uint32_t AvfsCksOffGbvTable;
+ uint32_t AvfsMeanNSigma;
+ uint32_t AvfsSclkOffsetTable;
+ uint32_t Reserved[12];
+ uint32_t Signature;
+};
+
+typedef struct SMU75_Firmware_Header SMU75_Firmware_Header;
+
+#define SMU7_FIRMWARE_HEADER_LOCATION 0x20000
+
+enum DisplayConfig {
+ PowerDown = 1,
+ DP54x4,
+ DP54x2,
+ DP54x1,
+ DP27x4,
+ DP27x2,
+ DP27x1,
+ HDMI297,
+ HDMI162,
+ LVDS,
+ DP324x4,
+ DP324x2,
+ DP324x1
+};
+
+#define MC_BLOCK_COUNT 1
+#define CPL_BLOCK_COUNT 5
+#define SE_BLOCK_COUNT 15
+#define GC_BLOCK_COUNT 24
+
+struct SMU7_Local_Cac {
+ uint8_t BlockId;
+ uint8_t SignalId;
+ uint8_t Threshold;
+ uint8_t Padding;
+};
+
+typedef struct SMU7_Local_Cac SMU7_Local_Cac;
+
+struct SMU7_Local_Cac_Table {
+ SMU7_Local_Cac CplLocalCac[CPL_BLOCK_COUNT];
+ SMU7_Local_Cac McLocalCac[MC_BLOCK_COUNT];
+ SMU7_Local_Cac SeLocalCac[SE_BLOCK_COUNT];
+ SMU7_Local_Cac GcLocalCac[GC_BLOCK_COUNT];
+};
+
+typedef struct SMU7_Local_Cac_Table SMU7_Local_Cac_Table;
+
+#pragma pack(pop)
+
+#define CG_SYS_BITMASK_FIRST_BIT 0
+#define CG_SYS_BITMASK_LAST_BIT 10
+#define CG_SYS_BIF_MGLS_SHIFT 0
+#define CG_SYS_ROM_SHIFT 1
+#define CG_SYS_MC_MGCG_SHIFT 2
+#define CG_SYS_MC_MGLS_SHIFT 3
+#define CG_SYS_SDMA_MGCG_SHIFT 4
+#define CG_SYS_SDMA_MGLS_SHIFT 5
+#define CG_SYS_DRM_MGCG_SHIFT 6
+#define CG_SYS_HDP_MGCG_SHIFT 7
+#define CG_SYS_HDP_MGLS_SHIFT 8
+#define CG_SYS_DRM_MGLS_SHIFT 9
+#define CG_SYS_BIF_MGCG_SHIFT 10
+
+#define CG_SYS_BIF_MGLS_MASK 0x1
+#define CG_SYS_ROM_MASK 0x2
+#define CG_SYS_MC_MGCG_MASK 0x4
+#define CG_SYS_MC_MGLS_MASK 0x8
+#define CG_SYS_SDMA_MGCG_MASK 0x10
+#define CG_SYS_SDMA_MGLS_MASK 0x20
+#define CG_SYS_DRM_MGCG_MASK 0x40
+#define CG_SYS_HDP_MGCG_MASK 0x80
+#define CG_SYS_HDP_MGLS_MASK 0x100
+#define CG_SYS_DRM_MGLS_MASK 0x200
+#define CG_SYS_BIF_MGCG_MASK 0x400
+
+#define CG_GFX_BITMASK_FIRST_BIT 16
+#define CG_GFX_BITMASK_LAST_BIT 24
+
+#define CG_GFX_CGCG_SHIFT 16
+#define CG_GFX_CGLS_SHIFT 17
+#define CG_CPF_MGCG_SHIFT 18
+#define CG_RLC_MGCG_SHIFT 19
+#define CG_GFX_OTHERS_MGCG_SHIFT 20
+#define CG_GFX_3DCG_SHIFT 21
+#define CG_GFX_3DLS_SHIFT 22
+#define CG_GFX_RLC_LS_SHIFT 23
+#define CG_GFX_CP_LS_SHIFT 24
+
+#define CG_GFX_CGCG_MASK 0x00010000
+#define CG_GFX_CGLS_MASK 0x00020000
+#define CG_CPF_MGCG_MASK 0x00040000
+#define CG_RLC_MGCG_MASK 0x00080000
+#define CG_GFX_OTHERS_MGCG_MASK 0x00100000
+#define CG_GFX_3DCG_MASK 0x00200000
+#define CG_GFX_3DLS_MASK 0x00400000
+#define CG_GFX_RLC_LS_MASK 0x00800000
+#define CG_GFX_CP_LS_MASK 0x01000000
+
+
+#define VRCONF_VDDC_MASK 0x000000FF
+#define VRCONF_VDDC_SHIFT 0
+#define VRCONF_VDDGFX_MASK 0x0000FF00
+#define VRCONF_VDDGFX_SHIFT 8
+#define VRCONF_VDDCI_MASK 0x00FF0000
+#define VRCONF_VDDCI_SHIFT 16
+#define VRCONF_MVDD_MASK 0xFF000000
+#define VRCONF_MVDD_SHIFT 24
+
+#define VR_MERGED_WITH_VDDC 0
+#define VR_SVI2_PLANE_1 1
+#define VR_SVI2_PLANE_2 2
+#define VR_SMIO_PATTERN_1 3
+#define VR_SMIO_PATTERN_2 4
+#define VR_STATIC_VOLTAGE 5
+
+#define CLOCK_STRETCHER_MAX_ENTRIES 0x4
+#define CKS_LOOKUPTable_MAX_ENTRIES 0x4
+
+#define CLOCK_STRETCHER_SETTING_DDT_MASK 0x01
+#define CLOCK_STRETCHER_SETTING_DDT_SHIFT 0x0
+#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_MASK 0x1E
+#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_SHIFT 0x1
+#define CLOCK_STRETCHER_SETTING_ENABLE_MASK 0x80
+#define CLOCK_STRETCHER_SETTING_ENABLE_SHIFT 0x7
+
+struct SMU_ClockStretcherDataTableEntry {
+ uint8_t minVID;
+ uint8_t maxVID;
+
+ uint16_t setting;
+};
+typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
+
+struct SMU_ClockStretcherDataTable {
+ SMU_ClockStretcherDataTableEntry ClockStretcherDataTableEntry[CLOCK_STRETCHER_MAX_ENTRIES];
+};
+typedef struct SMU_ClockStretcherDataTable SMU_ClockStretcherDataTable;
+
+struct SMU_CKS_LOOKUPTableEntry {
+ uint16_t minFreq;
+ uint16_t maxFreq;
+
+ uint8_t setting;
+ uint8_t padding[3];
+};
+typedef struct SMU_CKS_LOOKUPTableEntry SMU_CKS_LOOKUPTableEntry;
+
+struct SMU_CKS_LOOKUPTable {
+ SMU_CKS_LOOKUPTableEntry CKS_LOOKUPTableEntry[CKS_LOOKUPTable_MAX_ENTRIES];
+};
+typedef struct SMU_CKS_LOOKUPTable SMU_CKS_LOOKUPTable;
+
+struct AgmAvfsData_t {
+ uint16_t avgPsmCount[28];
+ uint16_t minPsmCount[28];
+};
+typedef struct AgmAvfsData_t AgmAvfsData_t;
+
+enum VFT_COLUMNS {
+ SCLK0,
+ SCLK1,
+ SCLK2,
+ SCLK3,
+ SCLK4,
+ SCLK5,
+ SCLK6,
+ SCLK7,
+
+ NUM_VFT_COLUMNS
+};
+enum {
+ SCS_FUSE_T0,
+ SCS_FUSE_T1,
+ NUM_SCS_FUSE_TEMPERATURE
+};
+enum {
+ SCKS_ON,
+ SCKS_OFF,
+ NUM_SCKS_STATE_TYPES
+};
+
+#define VFT_TABLE_DEFINED
+
+#define TEMP_RANGE_MAXSTEPS 12
+struct VFT_CELL_t {
+ uint16_t Voltage;
+};
+
+typedef struct VFT_CELL_t VFT_CELL_t;
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+struct SCS_CELL_t {
+ uint16_t PsmCnt[NUM_SCKS_STATE_TYPES];
+};
+typedef struct SCS_CELL_t SCS_CELL_t;
+#endif
+
+struct VFT_TABLE_t {
+ VFT_CELL_t Cell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS];
+ uint16_t AvfsGbv [NUM_VFT_COLUMNS];
+ uint16_t BtcGbv [NUM_VFT_COLUMNS];
+ int16_t Temperature [TEMP_RANGE_MAXSTEPS];
+
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+ SCS_CELL_t ScksCell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS];
+#endif
+
+ uint8_t NumTemperatureSteps;
+ uint8_t padding[3];
+};
+typedef struct VFT_TABLE_t VFT_TABLE_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+ int32_t a0;
+ int32_t a1;
+ int32_t a2;
+ uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct SMU_QuadraticCoeffs {
+ int32_t m1;
+ int32_t b;
+
+ int16_t m2;
+ uint8_t m1_shift;
+ uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
+struct AVFS_Margin_t {
+ VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+struct AVFS_CksOff_Gbv_t {
+ VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_CksOff_AvfsGbv_t {
+ VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_AvfsGbv_t AVFS_CksOff_AvfsGbv_t;
+
+struct AVFS_CksOff_BtcGbv_t {
+ VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_BtcGbv_t AVFS_CksOff_BtcGbv_t;
+
+struct AVFS_meanNsigma_t {
+ uint32_t Aconstant[3];
+ uint16_t DC_tol_sigma;
+ uint16_t Platform_mean;
+ uint16_t Platform_sigma;
+ uint16_t PSM_Age_CompFactor;
+ uint8_t Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+ uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
+struct Power_Sharing_t {
+ uint32_t EnergyCounter;
+ uint32_t EngeryThreshold;
+ uint64_t AM_SCLK_CNT;
+ uint64_t AM_0_BUSY_CNT;
+};
+typedef struct Power_Sharing_t Power_Sharing_t;
+
+
+#endif
+
+
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h
new file mode 100644
index 000000000000..b64e58a22ddf
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU75_DISCRETE_H
+#define SMU75_DISCRETE_H
+
+#include "smu75.h"
+
+#pragma pack(push, 1)
+
+#define NUM_SCLK_RANGE 8
+
+#define VCO_3_6 1
+#define VCO_2_4 3
+
+#define POSTDIV_DIV_BY_1 0
+#define POSTDIV_DIV_BY_2 1
+#define POSTDIV_DIV_BY_4 2
+#define POSTDIV_DIV_BY_8 3
+#define POSTDIV_DIV_BY_16 4
+
+struct sclkFcwRange_t {
+ uint8_t vco_setting; /* 1: 3-6GHz, 3: 2-4GHz */
+ uint8_t postdiv; /* divide by 2^n */
+ uint16_t fcw_pcc;
+ uint16_t fcw_trans_upper;
+ uint16_t fcw_trans_lower;
+};
+typedef struct sclkFcwRange_t sclkFcwRange_t;
+
+struct SMIO_Pattern {
+ uint16_t Voltage;
+ uint8_t Smio;
+ uint8_t padding;
+};
+
+typedef struct SMIO_Pattern SMIO_Pattern;
+
+struct SMIO_Table {
+ SMIO_Pattern Pattern[SMU_MAX_SMIO_LEVELS];
+};
+
+typedef struct SMIO_Table SMIO_Table;
+
+struct SMU_SclkSetting {
+ uint32_t SclkFrequency;
+ uint16_t Fcw_int;
+ uint16_t Fcw_frac;
+ uint16_t Pcc_fcw_int;
+ uint8_t PllRange;
+ uint8_t SSc_En;
+ uint16_t Sclk_slew_rate;
+ uint16_t Pcc_up_slew_rate;
+ uint16_t Pcc_down_slew_rate;
+ uint16_t Fcw1_int;
+ uint16_t Fcw1_frac;
+ uint16_t Sclk_ss_slew_rate;
+};
+typedef struct SMU_SclkSetting SMU_SclkSetting;
+
+struct SMU75_Discrete_GraphicsLevel {
+ SMU_VoltageLevel MinVoltage;
+
+ uint8_t pcieDpmLevel;
+ uint8_t DeepSleepDivId;
+ uint16_t ActivityLevel;
+
+ uint32_t CgSpllFuncCntl3;
+ uint32_t CgSpllFuncCntl4;
+ uint32_t CcPwrDynRm;
+ uint32_t CcPwrDynRm1;
+
+ uint8_t SclkDid;
+ uint8_t padding;
+ uint8_t EnabledForActivity;
+ uint8_t EnabledForThrottle;
+ uint8_t UpHyst;
+ uint8_t DownHyst;
+ uint8_t VoltageDownHyst;
+ uint8_t PowerThrottle;
+
+ SMU_SclkSetting SclkSetting;
+
+ uint8_t ScksStretchThreshVid[NUM_SCKS_STATE_TYPES];
+ uint16_t Padding;
+};
+
+typedef struct SMU75_Discrete_GraphicsLevel SMU75_Discrete_GraphicsLevel;
+
+struct SMU75_Discrete_ACPILevel {
+ uint32_t Flags;
+ SMU_VoltageLevel MinVoltage;
+ uint32_t SclkFrequency;
+ uint8_t SclkDid;
+ uint8_t DisplayWatermark;
+ uint8_t DeepSleepDivId;
+ uint8_t padding;
+ uint32_t CcPwrDynRm;
+ uint32_t CcPwrDynRm1;
+
+ SMU_SclkSetting SclkSetting;
+};
+
+typedef struct SMU75_Discrete_ACPILevel SMU75_Discrete_ACPILevel;
+
+struct SMU75_Discrete_Ulv {
+ uint32_t CcPwrDynRm;
+ uint32_t CcPwrDynRm1;
+ uint16_t VddcOffset;
+ uint8_t VddcOffsetVid;
+ uint8_t VddcPhase;
+ uint16_t BifSclkDfs;
+ uint16_t Reserved;
+};
+
+typedef struct SMU75_Discrete_Ulv SMU75_Discrete_Ulv;
+
+struct SMU75_Discrete_MemoryLevel {
+ SMU_VoltageLevel MinVoltage;
+ uint32_t MinMvdd;
+
+ uint32_t MclkFrequency;
+
+ uint8_t StutterEnable;
+ uint8_t EnabledForThrottle;
+ uint8_t EnabledForActivity;
+ uint8_t padding_0;
+
+ uint8_t UpHyst;
+ uint8_t DownHyst;
+ uint8_t VoltageDownHyst;
+ uint8_t padding_1;
+
+ uint16_t ActivityLevel;
+ uint8_t DisplayWatermark;
+ uint8_t padding_2;
+
+ uint16_t Fcw_int;
+ uint16_t Fcw_frac;
+ uint8_t Postdiv;
+ uint8_t padding_3[3];
+};
+
+typedef struct SMU75_Discrete_MemoryLevel SMU75_Discrete_MemoryLevel;
+
+struct SMU75_Discrete_LinkLevel {
+ uint8_t PcieGenSpeed;
+ uint8_t PcieLaneCount;
+ uint8_t EnabledForActivity;
+ uint8_t SPC;
+ uint32_t DownThreshold;
+ uint32_t UpThreshold;
+ uint16_t BifSclkDfs;
+ uint16_t Reserved;
+};
+
+typedef struct SMU75_Discrete_LinkLevel SMU75_Discrete_LinkLevel;
+
+
+/* MC ARB DRAM Timing registers. */
+struct SMU75_Discrete_MCArbDramTimingTableEntry {
+ uint32_t McArbDramTiming;
+ uint32_t McArbDramTiming2;
+ uint32_t McArbBurstTime;
+ uint32_t McArbRfshRate;
+ uint32_t McArbMisc3;
+};
+
+typedef struct SMU75_Discrete_MCArbDramTimingTableEntry SMU75_Discrete_MCArbDramTimingTableEntry;
+
+struct SMU75_Discrete_MCArbDramTimingTable {
+ SMU75_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS];
+};
+
+typedef struct SMU75_Discrete_MCArbDramTimingTable SMU75_Discrete_MCArbDramTimingTable;
+
+/* UVD VCLK/DCLK state (level) definition. */
+struct SMU75_Discrete_UvdLevel {
+ uint32_t VclkFrequency;
+ uint32_t DclkFrequency;
+ SMU_VoltageLevel MinVoltage;
+ uint8_t VclkDivider;
+ uint8_t DclkDivider;
+ uint8_t padding[2];
+};
+
+typedef struct SMU75_Discrete_UvdLevel SMU75_Discrete_UvdLevel;
+
+/* Clocks for other external blocks (VCE, ACP, SAMU). */
+struct SMU75_Discrete_ExtClkLevel {
+ uint32_t Frequency;
+ SMU_VoltageLevel MinVoltage;
+ uint8_t Divider;
+ uint8_t padding[3];
+};
+
+typedef struct SMU75_Discrete_ExtClkLevel SMU75_Discrete_ExtClkLevel;
+
+struct SMU75_Discrete_StateInfo {
+ uint32_t SclkFrequency;
+ uint32_t MclkFrequency;
+ uint32_t VclkFrequency;
+ uint32_t DclkFrequency;
+ uint32_t SamclkFrequency;
+ uint32_t AclkFrequency;
+ uint32_t EclkFrequency;
+ uint16_t MvddVoltage;
+ uint16_t padding16;
+ uint8_t DisplayWatermark;
+ uint8_t McArbIndex;
+ uint8_t McRegIndex;
+ uint8_t SeqIndex;
+ uint8_t SclkDid;
+ int8_t SclkIndex;
+ int8_t MclkIndex;
+ uint8_t PCIeGen;
+};
+
+typedef struct SMU75_Discrete_StateInfo SMU75_Discrete_StateInfo;
+
+struct SMU75_Discrete_DpmTable {
+ SMU75_PIDController GraphicsPIDController;
+ SMU75_PIDController MemoryPIDController;
+ SMU75_PIDController LinkPIDController;
+
+ uint32_t SystemFlags;
+
+ uint32_t VRConfig;
+ uint32_t SmioMask1;
+ uint32_t SmioMask2;
+ SMIO_Table SmioTable1;
+ SMIO_Table SmioTable2;
+
+ uint32_t MvddLevelCount;
+
+ uint8_t BapmVddcVidHiSidd [SMU75_MAX_LEVELS_VDDC];
+ uint8_t BapmVddcVidLoSidd [SMU75_MAX_LEVELS_VDDC];
+ uint8_t BapmVddcVidHiSidd2 [SMU75_MAX_LEVELS_VDDC];
+
+ uint8_t GraphicsDpmLevelCount;
+ uint8_t MemoryDpmLevelCount;
+ uint8_t LinkLevelCount;
+ uint8_t MasterDeepSleepControl;
+
+ uint8_t UvdLevelCount;
+ uint8_t VceLevelCount;
+ uint8_t AcpLevelCount;
+ uint8_t SamuLevelCount;
+
+ uint8_t ThermOutGpio;
+ uint8_t ThermOutPolarity;
+ uint8_t ThermOutMode;
+ uint8_t BootPhases;
+
+ uint8_t VRHotLevel;
+ uint8_t LdoRefSel;
+
+ uint8_t Reserved1[2];
+
+ uint16_t FanStartTemperature;
+ uint16_t FanStopTemperature;
+
+ uint16_t MaxVoltage;
+ uint16_t Reserved2;
+ uint32_t Reserved;
+
+ SMU75_Discrete_GraphicsLevel GraphicsLevel [SMU75_MAX_LEVELS_GRAPHICS];
+ SMU75_Discrete_MemoryLevel MemoryACPILevel;
+ SMU75_Discrete_MemoryLevel MemoryLevel [SMU75_MAX_LEVELS_MEMORY];
+ SMU75_Discrete_LinkLevel LinkLevel [SMU75_MAX_LEVELS_LINK];
+ SMU75_Discrete_ACPILevel ACPILevel;
+ SMU75_Discrete_UvdLevel UvdLevel [SMU75_MAX_LEVELS_UVD];
+ SMU75_Discrete_ExtClkLevel VceLevel [SMU75_MAX_LEVELS_VCE];
+ SMU75_Discrete_ExtClkLevel AcpLevel [SMU75_MAX_LEVELS_ACP];
+ SMU75_Discrete_ExtClkLevel SamuLevel [SMU75_MAX_LEVELS_SAMU];
+ SMU75_Discrete_Ulv Ulv;
+
+ uint8_t DisplayWatermark [SMU75_MAX_LEVELS_MEMORY][SMU75_MAX_LEVELS_GRAPHICS];
+
+ uint32_t SclkStepSize;
+ uint32_t Smio [SMU75_MAX_ENTRIES_SMIO];
+
+ uint8_t UvdBootLevel;
+ uint8_t VceBootLevel;
+ uint8_t AcpBootLevel;
+ uint8_t SamuBootLevel;
+
+ uint8_t GraphicsBootLevel;
+ uint8_t GraphicsVoltageChangeEnable;
+ uint8_t GraphicsThermThrottleEnable;
+ uint8_t GraphicsInterval;
+
+ uint8_t VoltageInterval;
+ uint8_t ThermalInterval;
+ uint16_t TemperatureLimitHigh;
+
+ uint16_t TemperatureLimitLow;
+ uint8_t MemoryBootLevel;
+ uint8_t MemoryVoltageChangeEnable;
+
+ uint16_t BootMVdd;
+ uint8_t MemoryInterval;
+ uint8_t MemoryThermThrottleEnable;
+
+ uint16_t VoltageResponseTime;
+ uint16_t PhaseResponseTime;
+
+ uint8_t PCIeBootLinkLevel;
+ uint8_t PCIeGenInterval;
+ uint8_t DTEInterval;
+ uint8_t DTEMode;
+
+ uint8_t SVI2Enable;
+ uint8_t VRHotGpio;
+ uint8_t AcDcGpio;
+ uint8_t ThermGpio;
+
+ uint16_t PPM_PkgPwrLimit;
+ uint16_t PPM_TemperatureLimit;
+
+ uint16_t DefaultTdp;
+ uint16_t TargetTdp;
+
+ uint16_t FpsHighThreshold;
+ uint16_t FpsLowThreshold;
+
+ uint16_t BAPMTI_R [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS];
+ uint16_t BAPMTI_RC [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS];
+
+ uint16_t TemperatureLimitEdge;
+ uint16_t TemperatureLimitHotspot;
+
+ uint16_t BootVddc;
+ uint16_t BootVddci;
+
+ uint16_t FanGainEdge;
+ uint16_t FanGainHotspot;
+
+ uint32_t LowSclkInterruptThreshold;
+ uint32_t VddGfxReChkWait;
+
+ uint8_t ClockStretcherAmount;
+ uint8_t Sclk_CKS_masterEn0_7;
+ uint8_t Sclk_CKS_masterEn8_15;
+ uint8_t DPMFreezeAndForced;
+
+ uint8_t Sclk_voltageOffset[8];
+
+ SMU_ClockStretcherDataTable ClockStretcherDataTable;
+ SMU_CKS_LOOKUPTable CKS_LOOKUPTable;
+
+ uint32_t CurrSclkPllRange;
+ sclkFcwRange_t SclkFcwRangeTable[NUM_SCLK_RANGE];
+
+ GB_VDROOP_TABLE_t BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+ SMU_QuadraticCoeffs AVFSGB_FUSE_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
+};
+
+typedef struct SMU75_Discrete_DpmTable SMU75_Discrete_DpmTable;
+
+struct SMU75_Discrete_FanTable {
+ uint16_t FdoMode;
+ int16_t TempMin;
+ int16_t TempMed;
+ int16_t TempMax;
+ int16_t Slope1;
+ int16_t Slope2;
+ int16_t FdoMin;
+ int16_t HystUp;
+ int16_t HystDown;
+ int16_t HystSlope;
+ int16_t TempRespLim;
+ int16_t TempCurr;
+ int16_t SlopeCurr;
+ int16_t PwmCurr;
+ uint32_t RefreshPeriod;
+ int16_t FdoMax;
+ uint8_t TempSrc;
+ int8_t Padding;
+};
+
+typedef struct SMU75_Discrete_FanTable SMU75_Discrete_FanTable;
+
+#define SMU7_DISCRETE_GPIO_SCLK_DEBUG 4
+#define SMU7_DISCRETE_GPIO_SCLK_DEBUG_BIT (0x1 << SMU7_DISCRETE_GPIO_SCLK_DEBUG)
+
+
+
+struct SMU7_MclkDpmScoreboard {
+ uint32_t PercentageBusy;
+
+ int32_t PIDError;
+ int32_t PIDIntegral;
+ int32_t PIDOutput;
+
+ uint32_t SigmaDeltaAccum;
+ uint32_t SigmaDeltaOutput;
+ uint32_t SigmaDeltaLevel;
+
+ uint32_t UtilizationSetpoint;
+
+ uint8_t TdpClampMode;
+ uint8_t TdcClampMode;
+ uint8_t ThermClampMode;
+ uint8_t VoltageBusy;
+
+ int8_t CurrLevel;
+ int8_t TargLevel;
+ uint8_t LevelChangeInProgress;
+ uint8_t UpHyst;
+
+ uint8_t DownHyst;
+ uint8_t VoltageDownHyst;
+ uint8_t DpmEnable;
+ uint8_t DpmRunning;
+
+ uint8_t DpmForce;
+ uint8_t DpmForceLevel;
+ uint8_t padding2;
+ uint8_t McArbIndex;
+
+ uint32_t MinimumPerfMclk;
+
+ uint8_t AcpiReq;
+ uint8_t AcpiAck;
+ uint8_t MclkSwitchInProgress;
+ uint8_t MclkSwitchCritical;
+
+ uint8_t IgnoreVBlank;
+ uint8_t TargetMclkIndex;
+ uint8_t TargetMvddIndex;
+ uint8_t MclkSwitchResult;
+
+ uint16_t VbiFailureCount;
+ uint8_t VbiWaitCounter;
+ uint8_t EnabledLevelsChange;
+
+ uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_MEMORY];
+ uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_MEMORY];
+
+ void (*TargetStateCalculator)(uint8_t);
+ void (*SavedTargetStateCalculator)(uint8_t);
+
+ uint16_t AutoDpmInterval;
+ uint16_t AutoDpmRange;
+
+ uint16_t VbiTimeoutCount;
+ uint16_t MclkSwitchingTime;
+
+ uint8_t fastSwitch;
+ uint8_t Save_PIC_VDDGFX_EXIT;
+ uint8_t Save_PIC_VDDGFX_ENTER;
+ uint8_t VbiTimeout;
+
+ uint32_t HbmTempRegBackup;
+};
+
+typedef struct SMU7_MclkDpmScoreboard SMU7_MclkDpmScoreboard;
+
+struct SMU7_UlvScoreboard {
+ uint8_t EnterUlv;
+ uint8_t ExitUlv;
+ uint8_t UlvActive;
+ uint8_t WaitingForUlv;
+ uint8_t UlvEnable;
+ uint8_t UlvRunning;
+ uint8_t UlvMasterEnable;
+ uint8_t padding;
+ uint32_t UlvAbortedCount;
+ uint32_t UlvTimeStamp;
+};
+
+typedef struct SMU7_UlvScoreboard SMU7_UlvScoreboard;
+
+struct VddgfxSavedRegisters {
+ uint32_t GPU_DBG[3];
+ uint32_t MEC_BaseAddress_Hi;
+ uint32_t MEC_BaseAddress_Lo;
+ uint32_t THM_TMON0_CTRL2__RDIR_PRESENT;
+ uint32_t THM_TMON1_CTRL2__RDIR_PRESENT;
+ uint32_t CP_INT_CNTL;
+};
+
+typedef struct VddgfxSavedRegisters VddgfxSavedRegisters;
+
+struct SMU7_VddGfxScoreboard {
+ uint8_t VddGfxEnable;
+ uint8_t VddGfxActive;
+ uint8_t VPUResetOccured;
+ uint8_t padding;
+
+ uint32_t VddGfxEnteredCount;
+ uint32_t VddGfxAbortedCount;
+
+ uint32_t VddGfxVid;
+
+ VddgfxSavedRegisters SavedRegisters;
+};
+
+typedef struct SMU7_VddGfxScoreboard SMU7_VddGfxScoreboard;
+
+struct SMU7_TdcLimitScoreboard {
+ uint8_t Enable;
+ uint8_t Running;
+ uint16_t Alpha;
+ uint32_t FilteredIddc;
+ uint32_t IddcLimit;
+ uint32_t IddcHyst;
+ SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_TdcLimitScoreboard SMU7_TdcLimitScoreboard;
+
+struct SMU7_PkgPwrLimitScoreboard {
+ uint8_t Enable;
+ uint8_t Running;
+ uint16_t Alpha;
+ uint32_t FilteredPkgPwr;
+ uint32_t Limit;
+ uint32_t Hyst;
+ uint32_t LimitFromDriver;
+ uint8_t PowerSharingEnabled;
+ uint8_t PowerSharingCounter;
+ uint8_t PowerSharingINTEnabled;
+ uint8_t GFXActivityCounterEnabled;
+ uint32_t EnergyCount;
+ uint32_t PSACTCount;
+ uint8_t RollOverRequired;
+ uint8_t RollOverCount;
+ uint8_t padding[2];
+ SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_PkgPwrLimitScoreboard SMU7_PkgPwrLimitScoreboard;
+
+struct SMU7_BapmScoreboard {
+ uint32_t source_powers[SMU75_DTE_SOURCES];
+ uint32_t source_powers_last[SMU75_DTE_SOURCES];
+ int32_t entity_temperatures[SMU75_NUM_GPU_TES];
+ int32_t initial_entity_temperatures[SMU75_NUM_GPU_TES];
+ int32_t Limit;
+ int32_t Hyst;
+ int32_t therm_influence_coeff_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS * 2];
+ int32_t therm_node_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+ uint16_t ConfigTDPPowerScalar;
+ uint16_t FanSpeedPowerScalar;
+ uint16_t OverDrivePowerScalar;
+ uint16_t OverDriveLimitScalar;
+ uint16_t FinalPowerScalar;
+ uint8_t VariantID;
+ uint8_t spare997;
+
+ SMU7_HystController_Data HystControllerData;
+
+ int32_t temperature_gradient_slope;
+ int32_t temperature_gradient;
+ uint32_t measured_temperature;
+};
+
+
+typedef struct SMU7_BapmScoreboard SMU7_BapmScoreboard;
+
+struct SMU7_AcpiScoreboard {
+ uint32_t SavedInterruptMask[2];
+ uint8_t LastACPIRequest;
+ uint8_t CgBifResp;
+ uint8_t RequestType;
+ uint8_t Padding;
+ SMU75_Discrete_ACPILevel D0Level;
+};
+
+typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
+
+struct SMU75_Discrete_PmFuses {
+ uint8_t BapmVddCVidHiSidd[8];
+
+ uint8_t BapmVddCVidLoSidd[8];
+
+ uint8_t VddCVid[8];
+
+ uint8_t SviLoadLineEn;
+ uint8_t SviLoadLineVddC;
+ uint8_t SviLoadLineTrimVddC;
+ uint8_t SviLoadLineOffsetVddC;
+
+ uint16_t TDC_VDDC_PkgLimit;
+ uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
+ uint8_t TDC_MAWt;
+
+ uint8_t TdcWaterfallCtl;
+ uint8_t LPMLTemperatureMin;
+ uint8_t LPMLTemperatureMax;
+ uint8_t Reserved;
+
+ uint8_t LPMLTemperatureScaler[16];
+
+ int16_t FuzzyFan_ErrorSetDelta;
+ int16_t FuzzyFan_ErrorRateSetDelta;
+ int16_t FuzzyFan_PwmSetDelta;
+ uint16_t Reserved6;
+
+ uint8_t GnbLPML[16];
+
+ uint8_t GnbLPMLMaxVid;
+ uint8_t GnbLPMLMinVid;
+ uint8_t Reserved1[2];
+
+ uint16_t BapmVddCBaseLeakageHiSidd;
+ uint16_t BapmVddCBaseLeakageLoSidd;
+
+ uint16_t VFT_Temp[3];
+ uint8_t Version;
+ uint8_t padding;
+
+ SMU_QuadraticCoeffs VFT_ATE[3];
+
+ SMU_QuadraticCoeffs AVFS_GB;
+ SMU_QuadraticCoeffs ATE_ACBTC_GB;
+
+ SMU_QuadraticCoeffs P2V;
+
+ uint32_t PsmCharzFreq;
+
+ uint16_t InversionVoltage;
+ uint16_t PsmCharzTemp;
+
+ uint32_t EnabledAvfsModules;
+
+ SMU_QuadraticCoeffs BtcGbv_CksOff;
+};
+
+typedef struct SMU75_Discrete_PmFuses SMU75_Discrete_PmFuses;
+
+struct SMU7_Discrete_Log_Header_Table {
+ uint32_t version;
+ uint32_t asic_id;
+ uint16_t flags;
+ uint16_t entry_size;
+ uint32_t total_size;
+ uint32_t num_of_entries;
+ uint8_t type;
+ uint8_t mode;
+ uint8_t filler_0[2];
+ uint32_t filler_1[2];
+};
+
+typedef struct SMU7_Discrete_Log_Header_Table SMU7_Discrete_Log_Header_Table;
+
+struct SMU7_Discrete_Log_Cntl {
+ uint8_t Enabled;
+ uint8_t Type;
+ uint8_t padding[2];
+ uint32_t BufferSize;
+ uint32_t SamplesLogged;
+ uint32_t SampleSize;
+ uint32_t AddrL;
+ uint32_t AddrH;
+};
+
+typedef struct SMU7_Discrete_Log_Cntl SMU7_Discrete_Log_Cntl;
+
+#if defined SMU__DGPU_ONLY
+#define CAC_ACC_NW_NUM_OF_SIGNALS 87
+#endif
+
+
+struct SMU7_Discrete_Cac_Collection_Table {
+ uint32_t temperature;
+ uint32_t cac_acc_nw[CAC_ACC_NW_NUM_OF_SIGNALS];
+};
+
+typedef struct SMU7_Discrete_Cac_Collection_Table SMU7_Discrete_Cac_Collection_Table;
+
+struct SMU7_Discrete_Cac_Verification_Table {
+ uint32_t VddcTotalPower;
+ uint32_t VddcLeakagePower;
+ uint32_t VddcConstantPower;
+ uint32_t VddcGfxDynamicPower;
+ uint32_t VddcUvdDynamicPower;
+ uint32_t VddcVceDynamicPower;
+ uint32_t VddcAcpDynamicPower;
+ uint32_t VddcPcieDynamicPower;
+ uint32_t VddcDceDynamicPower;
+ uint32_t VddcCurrent;
+ uint32_t VddcVoltage;
+ uint32_t VddciTotalPower;
+ uint32_t VddciLeakagePower;
+ uint32_t VddciConstantPower;
+ uint32_t VddciDynamicPower;
+ uint32_t Vddr1TotalPower;
+ uint32_t Vddr1LeakagePower;
+ uint32_t Vddr1ConstantPower;
+ uint32_t Vddr1DynamicPower;
+ uint32_t spare[4];
+ uint32_t temperature;
+};
+
+typedef struct SMU7_Discrete_Cac_Verification_Table SMU7_Discrete_Cac_Verification_Table;
+
+struct SMU7_Discrete_Pm_Status_Table {
+ int32_t T_meas_max[SMU75_THERMAL_INPUT_LOOP_COUNT];
+ int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT];
+
+ uint32_t I_calc_max;
+ uint32_t I_calc_acc;
+ uint32_t P_meas_acc;
+ uint32_t V_meas_load_acc;
+ uint32_t I_meas_acc;
+ uint32_t P_meas_acc_vddci;
+ uint32_t V_meas_load_acc_vddci;
+ uint32_t I_meas_acc_vddci;
+
+ uint16_t Sclk_dpm_residency[8];
+ uint16_t Uvd_dpm_residency[8];
+ uint16_t Vce_dpm_residency[8];
+ uint16_t Mclk_dpm_residency[4];
+
+ uint32_t P_roc_acc;
+ uint32_t PkgPwr_max;
+ uint32_t PkgPwr_acc;
+ uint32_t MclkSwitchingTime_max;
+ uint32_t MclkSwitchingTime_acc;
+ uint32_t FanPwm_acc;
+ uint32_t FanRpm_acc;
+ uint32_t Gfx_busy_acc;
+ uint32_t Mc_busy_acc;
+ uint32_t Fps_acc;
+
+ uint32_t AccCnt;
+};
+
+typedef struct SMU7_Discrete_Pm_Status_Table SMU7_Discrete_Pm_Status_Table;
+
+struct SMU7_Discrete_AutoWattMan_Status_Table {
+ int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT];
+ uint16_t Sclk_dpm_residency[8];
+ uint16_t Mclk_dpm_residency[4];
+ uint32_t TgpPwr_acc;
+ uint32_t Gfx_busy_acc;
+ uint32_t Mc_busy_acc;
+ uint32_t AccCnt;
+};
+
+typedef struct SMU7_Discrete_AutoWattMan_Status_Table SMU7_Discrete_AutoWattMan_Status_Table;
+
+#define SMU7_MAX_GFX_CU_COUNT 24
+#define SMU7_MIN_GFX_CU_COUNT 8
+#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT 0
+#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT)
+#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT 16
+#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT)
+
+struct SMU7_GfxCuPgScoreboard {
+ uint8_t Enabled;
+ uint8_t WaterfallUp;
+ uint8_t WaterfallDown;
+ uint8_t WaterfallLimit;
+ uint8_t CurrMaxCu;
+ uint8_t TargMaxCu;
+ uint8_t ClampMode;
+ uint8_t Active;
+ uint8_t MaxSupportedCu;
+ uint8_t MinSupportedCu;
+ uint8_t PendingGfxCuHostInterrupt;
+ uint8_t LastFilteredMaxCuInteger;
+ uint16_t FilteredMaxCu;
+ uint16_t FilteredMaxCuAlpha;
+ uint16_t FilterResetCount;
+ uint16_t FilterResetCountLimit;
+ uint8_t ForceCu;
+ uint8_t ForceCuCount;
+ uint8_t AcModeMaxCu;
+ uint8_t DcModeMaxCu;
+};
+
+typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
+
+#define SMU7_SCLK_CAC 0x561
+#define SMU7_MCLK_CAC 0xF9
+#define SMU7_VCLK_CAC 0x2DE
+#define SMU7_DCLK_CAC 0x2DE
+#define SMU7_ECLK_CAC 0x25E
+#define SMU7_ACLK_CAC 0x25E
+#define SMU7_SAMCLK_CAC 0x25E
+#define SMU7_DISPCLK_CAC 0x100
+#define SMU7_CAC_CONSTANT 0x2EE3430
+#define SMU7_CAC_CONSTANT_SHIFT 18
+
+#define SMU7_VDDCI_MCLK_CONST 1765
+#define SMU7_VDDCI_MCLK_CONST_SHIFT 16
+#define SMU7_VDDCI_VDDCI_CONST 50958
+#define SMU7_VDDCI_VDDCI_CONST_SHIFT 14
+#define SMU7_VDDCI_CONST 11781
+#define SMU7_VDDCI_STROBE_PWR 1331
+
+#define SMU7_VDDR1_CONST 693
+#define SMU7_VDDR1_CAC_WEIGHT 20
+#define SMU7_VDDR1_CAC_WEIGHT_SHIFT 19
+#define SMU7_VDDR1_STROBE_PWR 512
+
+#define SMU7_AREA_COEFF_UVD 0xA78
+#define SMU7_AREA_COEFF_VCE 0x190A
+#define SMU7_AREA_COEFF_ACP 0x22D1
+#define SMU7_AREA_COEFF_SAMU 0x534
+
+#define SMU7_THERM_OUT_MODE_DISABLE 0x0
+#define SMU7_THERM_OUT_MODE_THERM_ONLY 0x1
+#define SMU7_THERM_OUT_MODE_THERM_VRHOT 0x2
+
+#define SQ_Enable_MASK 0x1
+#define SQ_IR_MASK 0x2
+#define SQ_PCC_MASK 0x4
+#define SQ_EDC_MASK 0x8
+
+#define TCP_Enable_MASK 0x100
+#define TCP_IR_MASK 0x200
+#define TCP_PCC_MASK 0x400
+#define TCP_EDC_MASK 0x800
+
+#define TD_Enable_MASK 0x10000
+#define TD_IR_MASK 0x20000
+#define TD_PCC_MASK 0x40000
+#define TD_EDC_MASK 0x80000
+
+#define DB_Enable_MASK 0x1000000
+#define DB_IR_MASK 0x2000000
+#define DB_PCC_MASK 0x4000000
+#define DB_EDC_MASK 0x8000000
+
+#define SQ_Enable_SHIFT 0
+#define SQ_IR_SHIFT 1
+#define SQ_PCC_SHIFT 2
+#define SQ_EDC_SHIFT 3
+
+#define TCP_Enable_SHIFT 8
+#define TCP_IR_SHIFT 9
+#define TCP_PCC_SHIFT 10
+#define TCP_EDC_SHIFT 11
+
+#define TD_Enable_SHIFT 16
+#define TD_IR_SHIFT 17
+#define TD_PCC_SHIFT 18
+#define TD_EDC_SHIFT 19
+
+#define DB_Enable_SHIFT 24
+#define DB_IR_SHIFT 25
+#define DB_PCC_SHIFT 26
+#define DB_EDC_SHIFT 27
+
+#define PMFUSES_AVFSSIZE 104
+
+#define BTCGB0_Vdroop_Enable_MASK 0x1
+#define BTCGB1_Vdroop_Enable_MASK 0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT 0
+#define BTCGB1_Vdroop_Enable_SHIFT 1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+#pragma pack(pop)
+
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
index c3ed737ab951..715b5a168831 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
@@ -131,6 +131,7 @@ typedef uint16_t PPSMC_Result;
#define PPSMC_MSG_RunAcgInOpenLoop 0x5E
#define PPSMC_MSG_InitializeAcg 0x5F
#define PPSMC_MSG_GetCurrPkgPwr 0x61
+#define PPSMC_MSG_GetAverageGfxclkActualFrequency 0x63
#define PPSMC_MSG_SetPccThrottleLevel 0x67
#define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68
#define PPSMC_Message_Count 0x69
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
index 958755075421..0a200406a1ec 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
@@ -26,7 +26,7 @@
SMU_MGR = smumgr.o smu8_smumgr.o tonga_smumgr.o fiji_smumgr.o \
polaris10_smumgr.o iceland_smumgr.o \
smu7_smumgr.o vega10_smumgr.o smu10_smumgr.o ci_smumgr.o \
- vega12_smumgr.o
+ vega12_smumgr.o vegam_smumgr.o
AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR))
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 08d000140eca..2d4ec8ac3a08 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -61,9 +61,6 @@
#define SMC_RAM_END 0x40000
-#define VOLTAGE_SCALE 4
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
#define CISLAND_MINIMUM_ENGINE_CLOCK 800
#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5
@@ -211,9 +208,7 @@ static int ci_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
{
int ret;
- if (!ci_is_smc_ram_running(hwmgr))
- return -EINVAL;
-
+ cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0);
cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg);
PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
@@ -1182,7 +1177,6 @@ static int ci_populate_single_memory_level(
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
int result = 0;
bool dll_state_on;
- struct cgs_display_info info = {0};
uint32_t mclk_edc_wr_enable_threshold = 40000;
uint32_t mclk_edc_enable_threshold = 40000;
uint32_t mclk_strobe_mode_threshold = 40000;
@@ -1236,8 +1230,7 @@ static int ci_populate_single_memory_level(
/* default set to low watermark. Highest level will be set to high later.*/
memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
- cgs_get_active_displays_info(hwmgr->device, &info);
- data->display_timing.num_existing_displays = info.display_count;
+ data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
/* stutter mode not support on ci */
@@ -2784,7 +2777,6 @@ static int ci_smu_fini(struct pp_hwmgr *hwmgr)
{
kfree(hwmgr->smu_backend);
hwmgr->smu_backend = NULL;
- cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index faef78321446..53df9405f43a 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -53,10 +53,7 @@
#define FIJI_SMC_SIZE 0x20000
-#define VOLTAGE_SCALE 4
#define POWERTUNE_DEFAULT_SET_MAX 1
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
#define VDDC_VDDCI_DELTA 300
#define MC_CG_ARB_FREQ_F1 0x0b
@@ -288,8 +285,7 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr)
struct fiji_smumgr *priv = (struct fiji_smumgr *)(hwmgr->smu_backend);
/* Only start SMC if SMC RAM is not running */
- if (!(smu7_is_smc_ram_running(hwmgr)
- || cgs_is_virtualization_enabled(hwmgr->device))) {
+ if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
/* Check if SMU is running in protected mode */
if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
CGS_IND_REG__SMC,
@@ -307,13 +303,13 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr)
}
/* To initialize all clock gating before RLC loaded and running.*/
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_GMC, AMD_CG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_SDMA, AMD_CG_STATE_GATE);
- cgs_set_clockgating_state(hwmgr->device,
+ amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
AMD_IP_BLOCK_TYPE_COMMON, AMD_CG_STATE_GATE);
/* Setup SoftRegsStart here for register lookup in case
@@ -335,10 +331,10 @@ static bool fiji_is_hw_avfs_present(struct pp_hwmgr *hwmgr)
uint32_t efuse = 0;
uint32_t mask = (1 << ((AVFS_EN_MSB - AVFS_EN_LSB) + 1)) - 1;
- if (cgs_is_virtualization_enabled(hwmgr->device))
- return 0;
+ if (!hwmgr->not_vf)
+ return false;
- if (!atomctrl_read_efuse(hwmgr->device, AVFS_EN_LSB, AVFS_EN_MSB,
+ if (!atomctrl_read_efuse(hwmgr, AVFS_EN_LSB, AVFS_EN_MSB,
mask, &efuse)) {
if (efuse)
return true;
@@ -989,11 +985,11 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
threshold = clock * data->fast_watermark_threshold / 100;
- data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr;
+ data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock,
- hwmgr->display_config.min_core_set_clock_in_sr);
+ hwmgr->display_config->min_core_set_clock_in_sr);
/* Default to slow, highest DPM level will be
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index d4bb934e7334..415f691c3fa9 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -60,10 +60,7 @@
#define ICELAND_SMC_SIZE 0x20000
-#define VOLTAGE_SCALE 4
#define POWERTUNE_DEFAULT_SET_MAX 1
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
#define MC_CG_ARB_FREQ_F1 0x0b
#define VDDC_VDDCI_DELTA 200
@@ -932,7 +929,7 @@ static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
graphic_level->PowerThrottle = 0;
data->display_timing.min_clock_in_sr =
- hwmgr->display_config.min_core_set_clock_in_sr;
+ hwmgr->display_config->min_core_set_clock_in_sr;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_SclkDeepSleep))
@@ -1236,7 +1233,6 @@ static int iceland_populate_single_memory_level(
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
int result = 0;
bool dll_state_on;
- struct cgs_display_info info = {0};
uint32_t mclk_edc_wr_enable_threshold = 40000;
uint32_t mclk_edc_enable_threshold = 40000;
uint32_t mclk_strobe_mode_threshold = 40000;
@@ -1283,8 +1279,7 @@ static int iceland_populate_single_memory_level(
/* default set to low watermark. Highest level will be set to high later.*/
memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
- cgs_get_active_displays_info(hwmgr->device, &info);
- data->display_timing.num_existing_displays = info.display_count;
+ data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
/* stutter mode not support on iceland */
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 997a777dd35b..a8c6524f07e4 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,8 +52,6 @@
#include "dce/dce_10_0_sh_mask.h"
#define POLARIS10_SMC_SIZE 0x20000
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
#define POWERTUNE_DEFAULT_SET_MAX 1
#define VDDC_VDDCI_DELTA 200
#define MC_CG_ARB_FREQ_F1 0x0b
@@ -295,25 +293,16 @@ static int polaris10_start_smu(struct pp_hwmgr *hwmgr)
struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend);
/* Only start SMC if SMC RAM is not running */
- if (!(smu7_is_smc_ram_running(hwmgr)
- || cgs_is_virtualization_enabled(hwmgr->device))) {
+ if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
smu_data->protected_mode = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE));
smu_data->smu7_data.security_hard_key = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL));
/* Check if SMU is running in protected mode */
- if (smu_data->protected_mode == 0) {
+ if (smu_data->protected_mode == 0)
result = polaris10_start_smu_in_non_protection_mode(hwmgr);
- } else {
+ else
result = polaris10_start_smu_in_protection_mode(hwmgr);
- /* If failed, try with different security Key. */
- if (result != 0) {
- smu_data->smu7_data.security_hard_key ^= 1;
- cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
- result = polaris10_start_smu_in_protection_mode(hwmgr);
- }
- }
-
if (result != 0)
PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result);
@@ -951,11 +940,11 @@ static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
level->DownHyst = data->current_profile_setting.sclk_down_hyst;
level->VoltageDownHyst = 0;
level->PowerThrottle = 0;
- data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr;
+ data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock,
- hwmgr->display_config.min_core_set_clock_in_sr);
+ hwmgr->display_config->min_core_set_clock_in_sr);
/* Default to slow, highest DPM level will be
* set to PPSMC_DISPLAY_WATERMARK_LOW later.
@@ -1085,11 +1074,9 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
struct phm_ppt_v1_information *table_info =
(struct phm_ppt_v1_information *)(hwmgr->pptable);
int result = 0;
- struct cgs_display_info info = {0, 0, NULL};
uint32_t mclk_stutter_mode_threshold = 40000;
phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL;
- cgs_get_active_displays_info(hwmgr->device, &info);
if (hwmgr->od_enabled)
vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk;
@@ -1115,7 +1102,7 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
mem_level->StutterEnable = false;
mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
- data->display_timing.num_existing_displays = info.display_count;
+ data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
if (mclk_stutter_mode_threshold &&
(clock <= mclk_stutter_mode_threshold) &&
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index bc53f2beda30..0a563f6fe9ea 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -23,7 +23,7 @@
#include "smumgr.h"
#include "smu10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "smu10_smumgr.h"
#include "ppatomctrl.h"
#include "rv_ppsmc.h"
@@ -33,8 +33,6 @@
#include "pp_debug.h"
-#define VOLTAGE_SCALE 4
-
#define BUFFER_SIZE 80000
#define MAX_STRING_SIZE 15
#define BUFFER_SIZETWO 131072
@@ -49,48 +47,41 @@
static uint32_t smu10_wait_for_response(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t reg;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+ reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
phm_wait_for_register_unequal(hwmgr, reg,
0, MP1_C2PMSG_90__CONTENT_MASK);
- return cgs_read_register(hwmgr->device, reg);
+ return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
}
static int smu10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
- cgs_write_register(hwmgr->device, reg, msg);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
return 0;
}
static int smu10_read_arg_from_smc(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
-
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
+ struct amdgpu_device *adev = hwmgr->adev;
- return cgs_read_register(hwmgr->device, reg);
+ return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
}
static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
smu10_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
smu10_send_msg_to_smc_without_waiting(hwmgr, msg);
@@ -104,17 +95,13 @@ static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
static int smu10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
uint16_t msg, uint32_t parameter)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
smu10_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
- cgs_write_register(hwmgr->device, reg, parameter);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
smu10_send_msg_to_smc_without_waiting(hwmgr, msg);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
index 0399c10d2be0..d644a9bb9078 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@@ -167,24 +167,25 @@ int smu7_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
{
int ret;
- if (!smu7_is_smc_ram_running(hwmgr))
- return -EINVAL;
-
-
PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP);
- if (ret != 1)
- pr_info("\n failed to send pre message %x ret is %d \n", msg, ret);
+ if (ret == 0xFE)
+ pr_debug("last message was not supported\n");
+ else if (ret != 1)
+ pr_info("\n last message was failed ret is %d\n", ret);
+ cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0);
cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg);
PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP);
- if (ret != 1)
+ if (ret == 0xFE)
+ pr_debug("message %x was not supported\n", msg);
+ else if (ret != 1)
pr_info("\n failed to send message %x ret is %d \n", msg, ret);
return 0;
@@ -199,10 +200,6 @@ int smu7_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg)
int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
{
- if (!smu7_is_smc_ram_running(hwmgr)) {
- return -EINVAL;
- }
-
PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, parameter);
@@ -231,16 +228,6 @@ int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr)
return 0;
}
-int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr)
-{
- if (!smu7_is_smc_ram_running(hwmgr))
- return -EINVAL;
-
- PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, SMC_SYSCON_CLOCK_CNTL_0, cken, 0);
- return 0;
-}
-
-
enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type)
{
enum cgs_ucode_id result = CGS_UCODE_ID_MAXIMUM;
@@ -296,11 +283,9 @@ int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, uint32_t
result = smu7_set_smc_sram_address(hwmgr, smc_addr, limit);
- if (result)
- return result;
+ *value = result ? 0 : cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11);
- *value = cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11);
- return 0;
+ return result;
}
int smu7_write_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, uint32_t value, uint32_t limit)
@@ -375,7 +360,7 @@ static int smu7_populate_single_firmware_entry(struct pp_hwmgr *hwmgr,
entry->meta_data_addr_low = 0;
/* digest need be excluded out */
- if (cgs_is_virtualization_enabled(hwmgr->device))
+ if (!hwmgr->not_vf)
info.image_size -= 20;
entry->data_size_byte = info.image_size;
entry->num_register_entries = 0;
@@ -409,7 +394,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr)
0x0);
if (hwmgr->chip_id > CHIP_TOPAZ) { /* add support for Topaz */
- if (!cgs_is_virtualization_enabled(hwmgr->device)) {
+ if (hwmgr->not_vf) {
smu7_send_msg_to_smc_with_parameter(hwmgr,
PPSMC_MSG_SMU_DRAM_ADDR_HI,
upper_32_bits(smu_data->smu_buffer.mc_addr));
@@ -467,7 +452,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr)
PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr,
UCODE_ID_SDMA1, &toc->entry[toc->num_entries++]),
"Failed to Get Firmware Entry.", return -EINVAL);
- if (cgs_is_virtualization_enabled(hwmgr->device))
+ if (!hwmgr->not_vf)
PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr,
UCODE_ID_MEC_STORAGE, &toc->entry[toc->num_entries++]),
"Failed to Get Firmware Entry.", return -EINVAL);
@@ -608,7 +593,7 @@ int smu7_init(struct pp_hwmgr *hwmgr)
smu_data->header = smu_data->header_buffer.kaddr;
smu_data->header_buffer.mc_addr = mc_addr;
- if (cgs_is_virtualization_enabled(hwmgr->device))
+ if (!hwmgr->not_vf)
return 0;
smu_data->smu_buffer.data_size = 200*4096;
@@ -643,13 +628,12 @@ int smu7_smu_fini(struct pp_hwmgr *hwmgr)
&smu_data->header_buffer.mc_addr,
&smu_data->header_buffer.kaddr);
- if (!cgs_is_virtualization_enabled(hwmgr->device))
+ if (hwmgr->not_vf)
amdgpu_bo_free_kernel(&smu_data->smu_buffer.handle,
&smu_data->smu_buffer.mc_addr,
&smu_data->smu_buffer.kaddr);
kfree(hwmgr->smu_backend);
hwmgr->smu_backend = NULL;
- cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
index 126d300259ba..39c9bfda0ab4 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
@@ -67,7 +67,6 @@ int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg,
int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr,
uint16_t msg, uint32_t parameter);
int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr);
-int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr);
enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type);
int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c28b60aae5f8..c9837935f0f5 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -41,9 +41,11 @@ MODULE_FIRMWARE("amdgpu/polaris11_smc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_smc.bin");
+MODULE_FIRMWARE("amdgpu/vegam_smc.bin");
MODULE_FIRMWARE("amdgpu/vega10_smc.bin");
MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin");
MODULE_FIRMWARE("amdgpu/vega12_smc.bin");
+MODULE_FIRMWARE("amdgpu/vega20_smc.bin");
int smum_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
{
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index b51d7468c3e7..782b19fc2e70 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -55,11 +55,7 @@
#include "dce/dce_10_0_d.h"
#include "dce/dce_10_0_sh_mask.h"
-
-#define VOLTAGE_SCALE 4
#define POWERTUNE_DEFAULT_SET_MAX 1
-#define VOLTAGE_VID_OFFSET_SCALE1 625
-#define VOLTAGE_VID_OFFSET_SCALE2 100
#define MC_CG_ARB_FREQ_F1 0x0b
#define VDDC_VDDCI_DELTA 200
@@ -199,8 +195,7 @@ static int tonga_start_smu(struct pp_hwmgr *hwmgr)
int result;
/* Only start SMC if SMC RAM is not running */
- if (!(smu7_is_smc_ram_running(hwmgr) ||
- cgs_is_virtualization_enabled(hwmgr->device))) {
+ if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
/*Check if SMU is running in protected mode*/
if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
SMU_FIRMWARE, SMU_MODE)) {
@@ -651,7 +646,7 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
graphic_level->PowerThrottle = 0;
data->display_timing.min_clock_in_sr =
- hwmgr->display_config.min_core_set_clock_in_sr;
+ hwmgr->display_config->min_core_set_clock_in_sr;
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_SclkDeepSleep))
@@ -957,18 +952,17 @@ static int tonga_populate_single_memory_level(
SMU72_Discrete_MemoryLevel *memory_level
)
{
- uint32_t mvdd = 0;
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
struct phm_ppt_v1_information *pptable_info =
(struct phm_ppt_v1_information *)(hwmgr->pptable);
- int result = 0;
- bool dll_state_on;
- struct cgs_display_info info = {0};
uint32_t mclk_edc_wr_enable_threshold = 40000;
uint32_t mclk_stutter_mode_threshold = 30000;
uint32_t mclk_edc_enable_threshold = 40000;
uint32_t mclk_strobe_mode_threshold = 40000;
phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL;
+ int result = 0;
+ bool dll_state_on;
+ uint32_t mvdd = 0;
if (hwmgr->od_enabled)
vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk;
@@ -1009,8 +1003,7 @@ static int tonga_populate_single_memory_level(
/* default set to low watermark. Highest level will be set to high later.*/
memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
- cgs_get_active_displays_info(hwmgr->device, &info);
- data->display_timing.num_existing_displays = info.display_count;
+ data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
if ((mclk_stutter_mode_threshold != 0) &&
(memory_clock <= mclk_stutter_mode_threshold) &&
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 4aafb043bcb0..e84669c448a3 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -23,7 +23,7 @@
#include "smumgr.h"
#include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "vega10_smumgr.h"
#include "vega10_hwmgr.h"
#include "vega10_ppsmc.h"
@@ -35,8 +35,6 @@
#define AVFS_EN_MSB 1568
#define AVFS_EN_LSB 1568
-#define VOLTAGE_SCALE 4
-
/* Microcode file is stored in this buffer */
#define BUFFER_SIZE 80000
#define MAX_STRING_SIZE 15
@@ -54,18 +52,13 @@
static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr)
{
- uint32_t mp1_fw_flags, reg;
-
- reg = soc15_get_register_offset(NBIF_HWID, 0,
- mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2);
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t mp1_fw_flags;
- cgs_write_register(hwmgr->device, reg,
+ WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
- reg = soc15_get_register_offset(NBIF_HWID, 0,
- mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2);
-
- mp1_fw_flags = cgs_read_register(hwmgr->device, reg);
+ mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK)
return true;
@@ -81,11 +74,11 @@ static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr)
*/
static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t reg;
uint32_t ret;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+ reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
ret = phm_wait_for_register_unequal(hwmgr, reg,
0, MP1_C2PMSG_90__CONTENT_MASK);
@@ -93,7 +86,7 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr)
if (ret)
pr_err("No response from smu\n");
- return cgs_read_register(hwmgr->device, reg);
+ return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
}
/*
@@ -105,11 +98,9 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr)
static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
- cgs_write_register(hwmgr->device, reg, msg);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
return 0;
}
@@ -122,14 +113,12 @@ static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
*/
static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t ret;
vega10_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
vega10_send_msg_to_smc_without_waiting(hwmgr, msg);
@@ -150,18 +139,14 @@ static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
uint16_t msg, uint32_t parameter)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t ret;
vega10_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
- cgs_write_register(hwmgr->device, reg, parameter);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
vega10_send_msg_to_smc_without_waiting(hwmgr, msg);
@@ -174,12 +159,9 @@ static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
static int vega10_get_argument(struct pp_hwmgr *hwmgr)
{
- uint32_t reg;
-
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
+ struct amdgpu_device *adev = hwmgr->adev;
- return cgs_read_register(hwmgr->device, reg);
+ return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
}
static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 651a3f28734b..7d9b40e8b1bf 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -23,7 +23,7 @@
#include "smumgr.h"
#include "vega12_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
#include "vega12_smumgr.h"
#include "vega12_ppsmc.h"
#include "vega12/smu9_driver_if.h"
@@ -44,18 +44,13 @@
static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr)
{
- uint32_t mp1_fw_flags, reg;
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t mp1_fw_flags;
- reg = soc15_get_register_offset(NBIF_HWID, 0,
- mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2);
-
- cgs_write_register(hwmgr->device, reg,
+ WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
- reg = soc15_get_register_offset(NBIF_HWID, 0,
- mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2);
-
- mp1_fw_flags = cgs_read_register(hwmgr->device, reg);
+ mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
@@ -72,15 +67,15 @@ static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr)
*/
static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
uint32_t reg;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+ reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
phm_wait_for_register_unequal(hwmgr, reg,
0, MP1_C2PMSG_90__CONTENT_MASK);
- return cgs_read_register(hwmgr->device, reg);
+ return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
}
/*
@@ -92,11 +87,9 @@ static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr)
int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
- cgs_write_register(hwmgr->device, reg, msg);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
return 0;
}
@@ -109,13 +102,11 @@ int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
*/
int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
vega12_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
@@ -135,17 +126,13 @@ int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
uint16_t msg, uint32_t parameter)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
vega12_wait_for_response(hwmgr);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
- cgs_write_register(hwmgr->device, reg, 0);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
- cgs_write_register(hwmgr->device, reg, parameter);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
@@ -166,11 +153,9 @@ int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
int vega12_send_msg_to_smc_with_parameter_without_waiting(
struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
{
- uint32_t reg;
+ struct amdgpu_device *adev = hwmgr->adev;
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
- cgs_write_register(hwmgr->device, reg, parameter);
+ WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, parameter);
return vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
}
@@ -183,12 +168,9 @@ int vega12_send_msg_to_smc_with_parameter_without_waiting(
*/
int vega12_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg)
{
- uint32_t reg;
-
- reg = soc15_get_register_offset(MP1_HWID, 0,
- mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
+ struct amdgpu_device *adev = hwmgr->adev;
- *arg = cgs_read_register(hwmgr->device, reg);
+ *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
new file mode 100644
index 000000000000..2de48959ac93
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -0,0 +1,2383 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "pp_debug.h"
+#include "smumgr.h"
+#include "smu_ucode_xfer_vi.h"
+#include "vegam_smumgr.h"
+#include "smu/smu_7_1_3_d.h"
+#include "smu/smu_7_1_3_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gca/gfx_8_0_d.h"
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+#include "ppatomctrl.h"
+#include "cgs_common.h"
+#include "smu7_ppsmc.h"
+
+#include "smu7_dyn_defaults.h"
+
+#include "smu7_hwmgr.h"
+#include "hardwaremanager.h"
+#include "ppatomctrl.h"
+#include "atombios.h"
+#include "pppcielanes.h"
+
+#include "dce/dce_11_2_d.h"
+#include "dce/dce_11_2_sh_mask.h"
+
+#define PPVEGAM_TARGETACTIVITY_DFLT 50
+
+#define VOLTAGE_VID_OFFSET_SCALE1 625
+#define VOLTAGE_VID_OFFSET_SCALE2 100
+#define POWERTUNE_DEFAULT_SET_MAX 1
+#define VDDC_VDDCI_DELTA 200
+#define MC_CG_ARB_FREQ_F1 0x0b
+
+#define STRAP_ASIC_RO_LSB 2168
+#define STRAP_ASIC_RO_MSB 2175
+
+#define PPSMC_MSG_ApplyAvfsCksOffVoltage ((uint16_t) 0x415)
+#define PPSMC_MSG_EnableModeSwitchRLCNotification ((uint16_t) 0x305)
+
+static const struct vegam_pt_defaults
+vegam_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
+ /* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt,
+ * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */
+ { 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000,
+ { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61},
+ { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } },
+};
+
+static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = {
+ {VCO_2_4, POSTDIV_DIV_BY_16, 75, 160, 112},
+ {VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160},
+ {VCO_2_4, POSTDIV_DIV_BY_8, 75, 160, 112},
+ {VCO_3_6, POSTDIV_DIV_BY_8, 112, 224, 160},
+ {VCO_2_4, POSTDIV_DIV_BY_4, 75, 160, 112},
+ {VCO_3_6, POSTDIV_DIV_BY_4, 112, 216, 160},
+ {VCO_2_4, POSTDIV_DIV_BY_2, 75, 160, 108},
+ {VCO_3_6, POSTDIV_DIV_BY_2, 112, 216, 160} };
+
+static int vegam_smu_init(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data;
+
+ smu_data = kzalloc(sizeof(struct vegam_smumgr), GFP_KERNEL);
+ if (smu_data == NULL)
+ return -ENOMEM;
+
+ hwmgr->smu_backend = smu_data;
+
+ if (smu7_init(hwmgr)) {
+ kfree(smu_data);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vegam_start_smu_in_protection_mode(struct pp_hwmgr *hwmgr)
+{
+ int result = 0;
+
+ /* Wait for smc boot up */
+ /* PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(smumgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0) */
+
+ /* Assert reset */
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL, rst_reg, 1);
+
+ result = smu7_upload_smu_firmware_image(hwmgr);
+ if (result != 0)
+ return result;
+
+ /* Clear status */
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixSMU_STATUS, 0);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0);
+
+ /* De-assert reset */
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+
+ PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, RCU_UC_EVENTS, INTERRUPTS_ENABLED, 1);
+
+
+ /* Call Test SMU message with 0x20000 offset to trigger SMU start */
+ smu7_send_msg_to_smc_offset(hwmgr);
+
+ /* Wait done bit to be set */
+ /* Check pass/failed indicator */
+
+ PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, SMU_STATUS, SMU_DONE, 0);
+
+ if (1 != PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMU_STATUS, SMU_PASS))
+ PP_ASSERT_WITH_CODE(false, "SMU Firmware start failed!", return -1);
+
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixFIRMWARE_FLAGS, 0);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL, rst_reg, 1);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+ /* Wait for firmware to initialize */
+ PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1);
+
+ return result;
+}
+
+static int vegam_start_smu_in_non_protection_mode(struct pp_hwmgr *hwmgr)
+{
+ int result = 0;
+
+ /* wait for smc boot up */
+ PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0);
+
+ /* Clear firmware interrupt enable flag */
+ /* PHM_WRITE_VFPF_INDIRECT_FIELD(pSmuMgr, SMC_IND, SMC_SYSCON_MISC_CNTL, pre_fetcher_en, 1); */
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+ ixFIRMWARE_FLAGS, 0);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL,
+ rst_reg, 1);
+
+ result = smu7_upload_smu_firmware_image(hwmgr);
+ if (result != 0)
+ return result;
+
+ /* Set smc instruct start point at 0x0 */
+ smu7_program_jump_on_start(hwmgr);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0);
+
+ PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+ SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+ /* Wait for firmware to initialize */
+
+ PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND,
+ FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1);
+
+ return result;
+}
+
+static int vegam_start_smu(struct pp_hwmgr *hwmgr)
+{
+ int result = 0;
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ /* Only start SMC if SMC RAM is not running */
+ if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
+ smu_data->protected_mode = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
+ CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE));
+ smu_data->smu7_data.security_hard_key = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(
+ hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL));
+
+ /* Check if SMU is running in protected mode */
+ if (smu_data->protected_mode == 0)
+ result = vegam_start_smu_in_non_protection_mode(hwmgr);
+ else
+ result = vegam_start_smu_in_protection_mode(hwmgr);
+
+ if (result != 0)
+ PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result);
+ }
+
+ /* Setup SoftRegsStart here for register lookup in case DummyBackEnd is used and ProcessFirmwareHeader is not executed */
+ smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU75_Firmware_Header, SoftRegisters),
+ &(smu_data->smu7_data.soft_regs_start),
+ 0x40000);
+
+ result = smu7_request_smu_load_fw(hwmgr);
+
+ return result;
+}
+
+static int vegam_process_firmware_header(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t tmp;
+ int result;
+ bool error = false;
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, DpmTable),
+ &tmp, SMC_RAM_END);
+
+ if (0 == result)
+ smu_data->smu7_data.dpm_table_start = tmp;
+
+ error |= (0 != result);
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, SoftRegisters),
+ &tmp, SMC_RAM_END);
+
+ if (!result) {
+ data->soft_regs_start = tmp;
+ smu_data->smu7_data.soft_regs_start = tmp;
+ }
+
+ error |= (0 != result);
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, mcRegisterTable),
+ &tmp, SMC_RAM_END);
+
+ if (!result)
+ smu_data->smu7_data.mc_reg_table_start = tmp;
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, FanTable),
+ &tmp, SMC_RAM_END);
+
+ if (!result)
+ smu_data->smu7_data.fan_table_start = tmp;
+
+ error |= (0 != result);
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, mcArbDramTimingTable),
+ &tmp, SMC_RAM_END);
+
+ if (!result)
+ smu_data->smu7_data.arb_table_start = tmp;
+
+ error |= (0 != result);
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, Version),
+ &tmp, SMC_RAM_END);
+
+ if (!result)
+ hwmgr->microcode_version_info.SMC = tmp;
+
+ error |= (0 != result);
+
+ return error ? -1 : 0;
+}
+
+static bool vegam_is_dpm_running(struct pp_hwmgr *hwmgr)
+{
+ return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device,
+ CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON))
+ ? true : false;
+}
+
+static uint32_t vegam_get_mac_definition(uint32_t value)
+{
+ switch (value) {
+ case SMU_MAX_LEVELS_GRAPHICS:
+ return SMU75_MAX_LEVELS_GRAPHICS;
+ case SMU_MAX_LEVELS_MEMORY:
+ return SMU75_MAX_LEVELS_MEMORY;
+ case SMU_MAX_LEVELS_LINK:
+ return SMU75_MAX_LEVELS_LINK;
+ case SMU_MAX_ENTRIES_SMIO:
+ return SMU75_MAX_ENTRIES_SMIO;
+ case SMU_MAX_LEVELS_VDDC:
+ return SMU75_MAX_LEVELS_VDDC;
+ case SMU_MAX_LEVELS_VDDGFX:
+ return SMU75_MAX_LEVELS_VDDGFX;
+ case SMU_MAX_LEVELS_VDDCI:
+ return SMU75_MAX_LEVELS_VDDCI;
+ case SMU_MAX_LEVELS_MVDD:
+ return SMU75_MAX_LEVELS_MVDD;
+ case SMU_UVD_MCLK_HANDSHAKE_DISABLE:
+ return SMU7_UVD_MCLK_HANDSHAKE_DISABLE |
+ SMU7_VCE_MCLK_HANDSHAKE_DISABLE;
+ }
+
+ pr_warn("can't get the mac of %x\n", value);
+ return 0;
+}
+
+static int vegam_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint32_t mm_boot_level_offset, mm_boot_level_value;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+ smu_data->smc_state_table.UvdBootLevel = 0;
+ if (table_info->mm_dep_table->count > 0)
+ smu_data->smc_state_table.UvdBootLevel =
+ (uint8_t) (table_info->mm_dep_table->count - 1);
+ mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU75_Discrete_DpmTable,
+ UvdBootLevel);
+ mm_boot_level_offset /= 4;
+ mm_boot_level_offset *= 4;
+ mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset);
+ mm_boot_level_value &= 0x00FFFFFF;
+ mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24;
+ cgs_write_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+ if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_UVDDPM) ||
+ phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_StablePState))
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_UVDDPM_SetEnabledMask,
+ (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+ return 0;
+}
+
+static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint32_t mm_boot_level_offset, mm_boot_level_value;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_StablePState))
+ smu_data->smc_state_table.VceBootLevel =
+ (uint8_t) (table_info->mm_dep_table->count - 1);
+ else
+ smu_data->smc_state_table.VceBootLevel = 0;
+
+ mm_boot_level_offset = smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable, VceBootLevel);
+ mm_boot_level_offset /= 4;
+ mm_boot_level_offset *= 4;
+ mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset);
+ mm_boot_level_value &= 0xFF00FFFF;
+ mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16;
+ cgs_write_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_VCEDPM_SetEnabledMask,
+ (uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+ return 0;
+}
+
+static int vegam_update_samu_smc_table(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint32_t mm_boot_level_offset, mm_boot_level_value;
+
+
+ smu_data->smc_state_table.SamuBootLevel = 0;
+ mm_boot_level_offset = smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable, SamuBootLevel);
+
+ mm_boot_level_offset /= 4;
+ mm_boot_level_offset *= 4;
+ mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset);
+ mm_boot_level_value &= 0xFFFFFF00;
+ mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0;
+ cgs_write_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_StablePState))
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SAMUDPM_SetEnabledMask,
+ (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel));
+ return 0;
+}
+
+
+static int vegam_update_bif_smc_table(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table;
+ int max_entry, i;
+
+ max_entry = (SMU75_MAX_LEVELS_LINK < pcie_table->count) ?
+ SMU75_MAX_LEVELS_LINK :
+ pcie_table->count;
+ /* Setup BIF_SCLK levels */
+ for (i = 0; i < max_entry; i++)
+ smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk;
+ return 0;
+}
+
+static int vegam_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type)
+{
+ switch (type) {
+ case SMU_UVD_TABLE:
+ vegam_update_uvd_smc_table(hwmgr);
+ break;
+ case SMU_VCE_TABLE:
+ vegam_update_vce_smc_table(hwmgr);
+ break;
+ case SMU_SAMU_TABLE:
+ vegam_update_samu_smc_table(hwmgr);
+ break;
+ case SMU_BIF_TABLE:
+ vegam_update_bif_smc_table(hwmgr);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void vegam_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+ if (table_info &&
+ table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX &&
+ table_info->cac_dtp_table->usPowerTuneDataSetID)
+ smu_data->power_tune_defaults =
+ &vegam_power_tune_data_set_array
+ [table_info->cac_dtp_table->usPowerTuneDataSetID - 1];
+ else
+ smu_data->power_tune_defaults = &vegam_power_tune_data_set_array[0];
+
+}
+
+static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
+ SMU75_Discrete_DpmTable *table)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t count, level;
+
+ if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
+ count = data->mvdd_voltage_table.count;
+ if (count > SMU_MAX_SMIO_LEVELS)
+ count = SMU_MAX_SMIO_LEVELS;
+ for (level = 0; level < count; level++) {
+ table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
+ data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+ /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
+ table->SmioTable2.Pattern[level].Smio =
+ (uint8_t) level;
+ table->Smio[level] |=
+ data->mvdd_voltage_table.entries[level].smio_low;
+ }
+ table->SmioMask2 = data->mvdd_voltage_table.mask_low;
+
+ table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
+ }
+
+ return 0;
+}
+
+static int vegam_populate_smc_vddci_table(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ uint32_t count, level;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+ count = data->vddci_voltage_table.count;
+
+ if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) {
+ if (count > SMU_MAX_SMIO_LEVELS)
+ count = SMU_MAX_SMIO_LEVELS;
+ for (level = 0; level < count; ++level) {
+ table->SmioTable1.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
+ data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE);
+ table->SmioTable1.Pattern[level].Smio = (uint8_t) level;
+
+ table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low;
+ }
+ }
+
+ table->SmioMask1 = data->vddci_voltage_table.mask_low;
+
+ return 0;
+}
+
+static int vegam_populate_cac_table(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ uint32_t count;
+ uint8_t index;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+ table_info->vddc_lookup_table;
+ /* tables is already swapped, so in order to use the value from it,
+ * we need to swap it back.
+ * We are populating vddc CAC data to BapmVddc table
+ * in split and merged mode
+ */
+ for (count = 0; count < lookup_table->count; count++) {
+ index = phm_get_voltage_index(lookup_table,
+ data->vddc_voltage_table.entries[count].value);
+ table->BapmVddcVidLoSidd[count] =
+ convert_to_vid(lookup_table->entries[index].us_cac_low);
+ table->BapmVddcVidHiSidd[count] =
+ convert_to_vid(lookup_table->entries[index].us_cac_mid);
+ table->BapmVddcVidHiSidd2[count] =
+ convert_to_vid(lookup_table->entries[index].us_cac_high);
+ }
+
+ return 0;
+}
+
+static int vegam_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ vegam_populate_smc_vddci_table(hwmgr, table);
+ vegam_populate_smc_mvdd_table(hwmgr, table);
+ vegam_populate_cac_table(hwmgr, table);
+
+ return 0;
+}
+
+static int vegam_populate_ulv_level(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_Ulv *state)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+ state->CcPwrDynRm = 0;
+ state->CcPwrDynRm1 = 0;
+
+ state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset;
+ state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset *
+ VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
+
+ state->VddcPhase = data->vddc_phase_shed_control ^ 0x3;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm);
+ CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1);
+ CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset);
+
+ return 0;
+}
+
+static int vegam_populate_ulv_state(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ return vegam_populate_ulv_level(hwmgr, &table->Ulv);
+}
+
+static int vegam_populate_smc_link_level(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data =
+ (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct smu7_dpm_table *dpm_table = &data->dpm_table;
+ int i;
+
+ /* Index (dpm_table->pcie_speed_table.count)
+ * is reserved for PCIE boot level. */
+ for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) {
+ table->LinkLevel[i].PcieGenSpeed =
+ (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value;
+ table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width(
+ dpm_table->pcie_speed_table.dpm_levels[i].param1);
+ table->LinkLevel[i].EnabledForActivity = 1;
+ table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff);
+ table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5);
+ table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30);
+ }
+
+ smu_data->smc_state_table.LinkLevelCount =
+ (uint8_t)dpm_table->pcie_speed_table.count;
+
+/* To Do move to hwmgr */
+ data->dpm_level_enable_mask.pcie_dpm_enable_mask =
+ phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table);
+
+ return 0;
+}
+
+static int vegam_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
+ struct phm_ppt_v1_clock_voltage_dependency_table *dep_table,
+ uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd)
+{
+ uint32_t i;
+ uint16_t vddci;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+ *voltage = *mvdd = 0;
+
+ /* clock - voltage dependency table is empty table */
+ if (dep_table->count == 0)
+ return -EINVAL;
+
+ for (i = 0; i < dep_table->count; i++) {
+ /* find first sclk bigger than request */
+ if (dep_table->entries[i].clk >= clock) {
+ *voltage |= (dep_table->entries[i].vddc *
+ VOLTAGE_SCALE) << VDDC_SHIFT;
+ if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control)
+ *voltage |= (data->vbios_boot_state.vddci_bootup_value *
+ VOLTAGE_SCALE) << VDDCI_SHIFT;
+ else if (dep_table->entries[i].vddci)
+ *voltage |= (dep_table->entries[i].vddci *
+ VOLTAGE_SCALE) << VDDCI_SHIFT;
+ else {
+ vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
+ (dep_table->entries[i].vddc -
+ (uint16_t)VDDC_VDDCI_DELTA));
+ *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+ }
+
+ if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control)
+ *mvdd = data->vbios_boot_state.mvdd_bootup_value *
+ VOLTAGE_SCALE;
+ else if (dep_table->entries[i].mvdd)
+ *mvdd = (uint32_t) dep_table->entries[i].mvdd *
+ VOLTAGE_SCALE;
+
+ *voltage |= 1 << PHASES_SHIFT;
+ return 0;
+ }
+ }
+
+ /* sclk is bigger than max sclk in the dependence table */
+ *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+ vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
+ (dep_table->entries[i - 1].vddc -
+ (uint16_t)VDDC_VDDCI_DELTA));
+
+ if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control)
+ *voltage |= (data->vbios_boot_state.vddci_bootup_value *
+ VOLTAGE_SCALE) << VDDCI_SHIFT;
+ else if (dep_table->entries[i - 1].vddci)
+ *voltage |= (dep_table->entries[i - 1].vddci *
+ VOLTAGE_SCALE) << VDDC_SHIFT;
+ else
+ *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+ if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control)
+ *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE;
+ else if (dep_table->entries[i].mvdd)
+ *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE;
+
+ return 0;
+}
+
+static void vegam_get_sclk_range_table(struct pp_hwmgr *hwmgr,
+ SMU75_Discrete_DpmTable *table)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint32_t i, ref_clk;
+
+ struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } };
+
+ ref_clk = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
+
+ if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) {
+ for (i = 0; i < NUM_SCLK_RANGE; i++) {
+ table->SclkFcwRangeTable[i].vco_setting =
+ range_table_from_vbios.entry[i].ucVco_setting;
+ table->SclkFcwRangeTable[i].postdiv =
+ range_table_from_vbios.entry[i].ucPostdiv;
+ table->SclkFcwRangeTable[i].fcw_pcc =
+ range_table_from_vbios.entry[i].usFcw_pcc;
+
+ table->SclkFcwRangeTable[i].fcw_trans_upper =
+ range_table_from_vbios.entry[i].usFcw_trans_upper;
+ table->SclkFcwRangeTable[i].fcw_trans_lower =
+ range_table_from_vbios.entry[i].usRcw_trans_lower;
+
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc);
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper);
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower);
+ }
+ return;
+ }
+
+ for (i = 0; i < NUM_SCLK_RANGE; i++) {
+ smu_data->range_table[i].trans_lower_frequency =
+ (ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv;
+ smu_data->range_table[i].trans_upper_frequency =
+ (ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv;
+
+ table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting;
+ table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv;
+ table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc;
+
+ table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper;
+ table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower;
+
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc);
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper);
+ CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower);
+ }
+}
+
+static int vegam_calculate_sclk_params(struct pp_hwmgr *hwmgr,
+ uint32_t clock, SMU_SclkSetting *sclk_setting)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ const SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+ struct pp_atomctrl_clock_dividers_ai dividers;
+ uint32_t ref_clock;
+ uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq;
+ uint8_t i;
+ int result;
+ uint64_t temp;
+
+ sclk_setting->SclkFrequency = clock;
+ /* get the engine clock dividers for this clock value */
+ result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock, &dividers);
+ if (result == 0) {
+ sclk_setting->Fcw_int = dividers.usSclk_fcw_int;
+ sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac;
+ sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int;
+ sclk_setting->PllRange = dividers.ucSclkPllRange;
+ sclk_setting->Sclk_slew_rate = 0x400;
+ sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac;
+ sclk_setting->Pcc_down_slew_rate = 0xffff;
+ sclk_setting->SSc_En = dividers.ucSscEnable;
+ sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int;
+ sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac;
+ sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac;
+ return result;
+ }
+
+ ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
+
+ for (i = 0; i < NUM_SCLK_RANGE; i++) {
+ if (clock > smu_data->range_table[i].trans_lower_frequency
+ && clock <= smu_data->range_table[i].trans_upper_frequency) {
+ sclk_setting->PllRange = i;
+ break;
+ }
+ }
+
+ sclk_setting->Fcw_int = (uint16_t)
+ ((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+ ref_clock);
+ temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv;
+ temp <<= 0x10;
+ do_div(temp, ref_clock);
+ sclk_setting->Fcw_frac = temp & 0xffff;
+
+ pcc_target_percent = 10; /* Hardcode 10% for now. */
+ pcc_target_freq = clock - (clock * pcc_target_percent / 100);
+ sclk_setting->Pcc_fcw_int = (uint16_t)
+ ((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+ ref_clock);
+
+ ss_target_percent = 2; /* Hardcode 2% for now. */
+ sclk_setting->SSc_En = 0;
+ if (ss_target_percent) {
+ sclk_setting->SSc_En = 1;
+ ss_target_freq = clock - (clock * ss_target_percent / 100);
+ sclk_setting->Fcw1_int = (uint16_t)
+ ((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+ ref_clock);
+ temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv;
+ temp <<= 0x10;
+ do_div(temp, ref_clock);
+ sclk_setting->Fcw1_frac = temp & 0xffff;
+ }
+
+ return 0;
+}
+
+static uint8_t vegam_get_sleep_divider_id_from_clock(uint32_t clock,
+ uint32_t clock_insr)
+{
+ uint8_t i;
+ uint32_t temp;
+ uint32_t min = max(clock_insr, (uint32_t)SMU7_MINIMUM_ENGINE_CLOCK);
+
+ PP_ASSERT_WITH_CODE((clock >= min),
+ "Engine clock can't satisfy stutter requirement!",
+ return 0);
+ for (i = 31; ; i--) {
+ temp = clock / (i + 1);
+
+ if (temp >= min || i == 0)
+ break;
+ }
+ return i;
+}
+
+static int vegam_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
+ uint32_t clock, struct SMU75_Discrete_GraphicsLevel *level)
+{
+ int result;
+ /* PP_Clocks minClocks; */
+ uint32_t mvdd;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ SMU_SclkSetting curr_sclk_setting = { 0 };
+
+ result = vegam_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting);
+
+ /* populate graphics levels */
+ result = vegam_get_dependency_volt_by_clk(hwmgr,
+ table_info->vdd_dep_on_sclk, clock,
+ &level->MinVoltage, &mvdd);
+
+ PP_ASSERT_WITH_CODE((0 == result),
+ "can not find VDDC voltage value for "
+ "VDDC engine clock dependency table",
+ return result);
+ level->ActivityLevel = (uint16_t)(SclkDPMTuning_VEGAM >> DPMTuning_Activity_Shift);
+
+ level->CcPwrDynRm = 0;
+ level->CcPwrDynRm1 = 0;
+ level->EnabledForActivity = 0;
+ level->EnabledForThrottle = 1;
+ level->VoltageDownHyst = 0;
+ level->PowerThrottle = 0;
+ data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
+ level->DeepSleepDivId = vegam_get_sleep_divider_id_from_clock(clock,
+ hwmgr->display_config->min_core_set_clock_in_sr);
+
+ level->SclkSetting = curr_sclk_setting;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage);
+ CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm);
+ CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1);
+ CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel);
+ CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac);
+ CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate);
+ return 0;
+}
+
+static int vegam_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct smu7_dpm_table *dpm_table = &hw_data->dpm_table;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table;
+ uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count;
+ int result = 0;
+ uint32_t array = smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable, GraphicsLevel);
+ uint32_t array_size = sizeof(struct SMU75_Discrete_GraphicsLevel) *
+ SMU75_MAX_LEVELS_GRAPHICS;
+ struct SMU75_Discrete_GraphicsLevel *levels =
+ smu_data->smc_state_table.GraphicsLevel;
+ uint32_t i, max_entry;
+ uint8_t hightest_pcie_level_enabled = 0,
+ lowest_pcie_level_enabled = 0,
+ mid_pcie_level_enabled = 0,
+ count = 0;
+
+ vegam_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table));
+
+ for (i = 0; i < dpm_table->sclk_table.count; i++) {
+
+ result = vegam_populate_single_graphic_level(hwmgr,
+ dpm_table->sclk_table.dpm_levels[i].value,
+ &(smu_data->smc_state_table.GraphicsLevel[i]));
+ if (result)
+ return result;
+
+ levels[i].UpHyst = (uint8_t)
+ (SclkDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift);
+ levels[i].DownHyst = (uint8_t)
+ (SclkDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift);
+ /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */
+ if (i > 1)
+ levels[i].DeepSleepDivId = 0;
+ }
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_SPLLShutdownSupport))
+ smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0;
+
+ smu_data->smc_state_table.GraphicsDpmLevelCount =
+ (uint8_t)dpm_table->sclk_table.count;
+ hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask =
+ phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table);
+
+ for (i = 0; i < dpm_table->sclk_table.count; i++)
+ levels[i].EnabledForActivity =
+ (hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask >> i) & 0x1;
+
+ if (pcie_table != NULL) {
+ PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt),
+ "There must be 1 or more PCIE levels defined in PPTable.",
+ return -EINVAL);
+ max_entry = pcie_entry_cnt - 1;
+ for (i = 0; i < dpm_table->sclk_table.count; i++)
+ levels[i].pcieDpmLevel =
+ (uint8_t) ((i < max_entry) ? i : max_entry);
+ } else {
+ while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &&
+ ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+ (1 << (hightest_pcie_level_enabled + 1))) != 0))
+ hightest_pcie_level_enabled++;
+
+ while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &&
+ ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+ (1 << lowest_pcie_level_enabled)) == 0))
+ lowest_pcie_level_enabled++;
+
+ while ((count < hightest_pcie_level_enabled) &&
+ ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+ (1 << (lowest_pcie_level_enabled + 1 + count))) == 0))
+ count++;
+
+ mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) <
+ hightest_pcie_level_enabled ?
+ (lowest_pcie_level_enabled + 1 + count) :
+ hightest_pcie_level_enabled;
+
+ /* set pcieDpmLevel to hightest_pcie_level_enabled */
+ for (i = 2; i < dpm_table->sclk_table.count; i++)
+ levels[i].pcieDpmLevel = hightest_pcie_level_enabled;
+
+ /* set pcieDpmLevel to lowest_pcie_level_enabled */
+ levels[0].pcieDpmLevel = lowest_pcie_level_enabled;
+
+ /* set pcieDpmLevel to mid_pcie_level_enabled */
+ levels[1].pcieDpmLevel = mid_pcie_level_enabled;
+ }
+ /* level count will send to smc once at init smc table and never change */
+ result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels,
+ (uint32_t)array_size, SMC_RAM_END);
+
+ return result;
+}
+
+static int vegam_calculate_mclk_params(struct pp_hwmgr *hwmgr,
+ uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level)
+{
+ struct pp_atomctrl_memory_clock_param_ai mpll_param;
+
+ PP_ASSERT_WITH_CODE(!atomctrl_get_memory_pll_dividers_ai(hwmgr,
+ clock, &mpll_param),
+ "Failed to retrieve memory pll parameter.",
+ return -EINVAL);
+
+ mem_level->MclkFrequency = (uint32_t)mpll_param.ulClock;
+ mem_level->Fcw_int = (uint16_t)mpll_param.ulMclk_fcw_int;
+ mem_level->Fcw_frac = (uint16_t)mpll_param.ulMclk_fcw_frac;
+ mem_level->Postdiv = (uint8_t)mpll_param.ulPostDiv;
+
+ return 0;
+}
+
+static int vegam_populate_single_memory_level(struct pp_hwmgr *hwmgr,
+ uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ int result = 0;
+ uint32_t mclk_stutter_mode_threshold = 60000;
+
+
+ if (table_info->vdd_dep_on_mclk) {
+ result = vegam_get_dependency_volt_by_clk(hwmgr,
+ table_info->vdd_dep_on_mclk, clock,
+ &mem_level->MinVoltage, &mem_level->MinMvdd);
+ PP_ASSERT_WITH_CODE(!result,
+ "can not find MinVddc voltage value from memory "
+ "VDDC voltage dependency table", return result);
+ }
+
+ result = vegam_calculate_mclk_params(hwmgr, clock, mem_level);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to calculate mclk params.",
+ return -EINVAL);
+
+ mem_level->EnabledForThrottle = 1;
+ mem_level->EnabledForActivity = 0;
+ mem_level->VoltageDownHyst = 0;
+ mem_level->ActivityLevel = (uint16_t)
+ (MemoryDPMTuning_VEGAM >> DPMTuning_Activity_Shift);
+ mem_level->StutterEnable = false;
+ mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
+
+ data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
+
+ if (mclk_stutter_mode_threshold &&
+ (clock <= mclk_stutter_mode_threshold) &&
+ (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL,
+ STUTTER_ENABLE) & 0x1))
+ mem_level->StutterEnable = true;
+
+ if (!result) {
+ CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd);
+ CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_int);
+ CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_frac);
+ CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel);
+ CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage);
+ }
+
+ return result;
+}
+
+static int vegam_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct smu7_dpm_table *dpm_table = &hw_data->dpm_table;
+ int result;
+ /* populate MCLK dpm table to SMU7 */
+ uint32_t array = smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable, MemoryLevel);
+ uint32_t array_size = sizeof(SMU75_Discrete_MemoryLevel) *
+ SMU75_MAX_LEVELS_MEMORY;
+ struct SMU75_Discrete_MemoryLevel *levels =
+ smu_data->smc_state_table.MemoryLevel;
+ uint32_t i;
+
+ for (i = 0; i < dpm_table->mclk_table.count; i++) {
+ PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value),
+ "can not populate memory level as memory clock is zero",
+ return -EINVAL);
+ result = vegam_populate_single_memory_level(hwmgr,
+ dpm_table->mclk_table.dpm_levels[i].value,
+ &levels[i]);
+
+ if (result)
+ return result;
+
+ levels[i].UpHyst = (uint8_t)
+ (MemoryDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift);
+ levels[i].DownHyst = (uint8_t)
+ (MemoryDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift);
+ }
+
+ smu_data->smc_state_table.MemoryDpmLevelCount =
+ (uint8_t)dpm_table->mclk_table.count;
+ hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask =
+ phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table);
+
+ for (i = 0; i < dpm_table->mclk_table.count; i++)
+ levels[i].EnabledForActivity =
+ (hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask >> i) & 0x1;
+
+ levels[dpm_table->mclk_table.count - 1].DisplayWatermark =
+ PPSMC_DISPLAY_WATERMARK_HIGH;
+
+ /* level count will send to smc once at init smc table and never change */
+ result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels,
+ (uint32_t)array_size, SMC_RAM_END);
+
+ return result;
+}
+
+static int vegam_populate_mvdd_value(struct pp_hwmgr *hwmgr,
+ uint32_t mclk, SMIO_Pattern *smio_pat)
+{
+ const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ uint32_t i = 0;
+
+ if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) {
+ /* find mvdd value which clock is more than request */
+ for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) {
+ if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) {
+ smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value;
+ break;
+ }
+ }
+ PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count,
+ "MVDD Voltage is outside the supported range.",
+ return -EINVAL);
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
+ SMU75_Discrete_DpmTable *table)
+{
+ int result = 0;
+ uint32_t sclk_frequency;
+ const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ SMIO_Pattern vol_level;
+ uint32_t mvdd;
+ uint16_t us_mvdd;
+
+ table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
+
+ /* Get MinVoltage and Frequency from DPM0,
+ * already converted to SMC_UL */
+ sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
+ result = vegam_get_dependency_volt_by_clk(hwmgr,
+ table_info->vdd_dep_on_sclk,
+ sclk_frequency,
+ &table->ACPILevel.MinVoltage, &mvdd);
+ PP_ASSERT_WITH_CODE(!result,
+ "Cannot find ACPI VDDC voltage value "
+ "in Clock Dependency Table",
+ );
+
+ result = vegam_calculate_sclk_params(hwmgr, sclk_frequency,
+ &(table->ACPILevel.SclkSetting));
+ PP_ASSERT_WITH_CODE(!result,
+ "Error retrieving Engine Clock dividers from VBIOS.",
+ return result);
+
+ table->ACPILevel.DeepSleepDivId = 0;
+ table->ACPILevel.CcPwrDynRm = 0;
+ table->ACPILevel.CcPwrDynRm1 = 0;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1);
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
+ CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
+
+
+ /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+ table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value;
+ result = vegam_get_dependency_volt_by_clk(hwmgr,
+ table_info->vdd_dep_on_mclk,
+ table->MemoryACPILevel.MclkFrequency,
+ &table->MemoryACPILevel.MinVoltage, &mvdd);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "Cannot find ACPI VDDCI voltage value "
+ "in Clock Dependency Table",
+ );
+
+ us_mvdd = 0;
+ if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
+ (data->mclk_dpm_key_disabled))
+ us_mvdd = data->vbios_boot_state.mvdd_bootup_value;
+ else {
+ if (!vegam_populate_mvdd_value(hwmgr,
+ data->dpm_table.mclk_table.dpm_levels[0].value,
+ &vol_level))
+ us_mvdd = vol_level.Voltage;
+ }
+
+ if (!vegam_populate_mvdd_value(hwmgr, 0, &vol_level))
+ table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage);
+ else
+ table->MemoryACPILevel.MinMvdd = 0;
+
+ table->MemoryACPILevel.StutterEnable = false;
+
+ table->MemoryACPILevel.EnabledForThrottle = 0;
+ table->MemoryACPILevel.EnabledForActivity = 0;
+ table->MemoryACPILevel.UpHyst = 0;
+ table->MemoryACPILevel.DownHyst = 100;
+ table->MemoryACPILevel.VoltageDownHyst = 0;
+ table->MemoryACPILevel.ActivityLevel =
+ PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity);
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage);
+
+ return result;
+}
+
+static int vegam_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
+ SMU75_Discrete_DpmTable *table)
+{
+ int result = -EINVAL;
+ uint8_t count;
+ struct pp_atomctrl_clock_dividers_vi dividers;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+ table_info->mm_dep_table;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t vddci;
+
+ table->VceLevelCount = (uint8_t)(mm_table->count);
+ table->VceBootLevel = 0;
+
+ for (count = 0; count < table->VceLevelCount; count++) {
+ table->VceLevel[count].Frequency = mm_table->entries[count].eclk;
+ table->VceLevel[count].MinVoltage = 0;
+ table->VceLevel[count].MinVoltage |=
+ (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+ if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+ vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+ mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+ else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+ vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+ else
+ vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
+ table->VceLevel[count].MinVoltage |=
+ (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+ table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+ /*retrieve divider value for VBIOS */
+ result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+ table->VceLevel[count].Frequency, &dividers);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "can not find divide id for VCE engine clock",
+ return result);
+
+ table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage);
+ }
+ return result;
+}
+
+static int vegam_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
+ SMU75_Discrete_DpmTable *table)
+{
+ int result = -EINVAL;
+ uint8_t count;
+ struct pp_atomctrl_clock_dividers_vi dividers;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+ table_info->mm_dep_table;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t vddci;
+
+ table->SamuBootLevel = 0;
+ table->SamuLevelCount = (uint8_t)(mm_table->count);
+
+ for (count = 0; count < table->SamuLevelCount; count++) {
+ /* not sure whether we need evclk or not */
+ table->SamuLevel[count].MinVoltage = 0;
+ table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
+ table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
+ VOLTAGE_SCALE) << VDDC_SHIFT;
+
+ if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+ vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+ mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+ else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+ vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+ else
+ vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+ table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+ table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+ /* retrieve divider value for VBIOS */
+ result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+ table->SamuLevel[count].Frequency, &dividers);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "can not find divide id for samu clock", return result);
+
+ table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage);
+ }
+ return result;
+}
+
+static int vegam_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr,
+ int32_t eng_clock, int32_t mem_clock,
+ SMU75_Discrete_MCArbDramTimingTableEntry *arb_regs)
+{
+ uint32_t dram_timing;
+ uint32_t dram_timing2;
+ uint32_t burst_time;
+ uint32_t rfsh_rate;
+ uint32_t misc3;
+
+ int result;
+
+ result = atomctrl_set_engine_dram_timings_rv770(hwmgr,
+ eng_clock, mem_clock);
+ PP_ASSERT_WITH_CODE(result == 0,
+ "Error calling VBIOS to set DRAM_TIMING.",
+ return result);
+
+ dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING);
+ dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2);
+ burst_time = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME);
+ rfsh_rate = cgs_read_register(hwmgr->device, mmMC_ARB_RFSH_RATE);
+ misc3 = cgs_read_register(hwmgr->device, mmMC_ARB_MISC3);
+
+ arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing);
+ arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2);
+ arb_regs->McArbBurstTime = PP_HOST_TO_SMC_UL(burst_time);
+ arb_regs->McArbRfshRate = PP_HOST_TO_SMC_UL(rfsh_rate);
+ arb_regs->McArbMisc3 = PP_HOST_TO_SMC_UL(misc3);
+
+ return 0;
+}
+
+static int vegam_program_memory_timing_parameters(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct SMU75_Discrete_MCArbDramTimingTable arb_regs;
+ uint32_t i, j;
+ int result = 0;
+
+ memset(&arb_regs, 0, sizeof(SMU75_Discrete_MCArbDramTimingTable));
+
+ for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) {
+ for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) {
+ result = vegam_populate_memory_timing_parameters(hwmgr,
+ hw_data->dpm_table.sclk_table.dpm_levels[i].value,
+ hw_data->dpm_table.mclk_table.dpm_levels[j].value,
+ &arb_regs.entries[i][j]);
+ if (result)
+ return result;
+ }
+ }
+
+ result = smu7_copy_bytes_to_smc(
+ hwmgr,
+ smu_data->smu7_data.arb_table_start,
+ (uint8_t *)&arb_regs,
+ sizeof(SMU75_Discrete_MCArbDramTimingTable),
+ SMC_RAM_END);
+ return result;
+}
+
+static int vegam_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ int result = -EINVAL;
+ uint8_t count;
+ struct pp_atomctrl_clock_dividers_vi dividers;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+ table_info->mm_dep_table;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ uint32_t vddci;
+
+ table->UvdLevelCount = (uint8_t)(mm_table->count);
+ table->UvdBootLevel = 0;
+
+ for (count = 0; count < table->UvdLevelCount; count++) {
+ table->UvdLevel[count].MinVoltage = 0;
+ table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk;
+ table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
+ table->UvdLevel[count].MinVoltage |=
+ (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+ if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+ vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+ mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+ else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+ vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+ else
+ vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+ table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+ table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+ /* retrieve divider value for VBIOS */
+ result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+ table->UvdLevel[count].VclkFrequency, &dividers);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "can not find divide id for Vclk clock", return result);
+
+ table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider;
+
+ result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+ table->UvdLevel[count].DclkFrequency, &dividers);
+ PP_ASSERT_WITH_CODE((0 == result),
+ "can not find divide id for Dclk clock", return result);
+
+ table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
+ }
+
+ return result;
+}
+
+static int vegam_populate_smc_boot_level(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ int result = 0;
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+ table->GraphicsBootLevel = 0;
+ table->MemoryBootLevel = 0;
+
+ /* find boot level from dpm table */
+ result = phm_find_boot_level(&(data->dpm_table.sclk_table),
+ data->vbios_boot_state.sclk_bootup_value,
+ (uint32_t *)&(table->GraphicsBootLevel));
+
+ result = phm_find_boot_level(&(data->dpm_table.mclk_table),
+ data->vbios_boot_state.mclk_bootup_value,
+ (uint32_t *)&(table->MemoryBootLevel));
+
+ table->BootVddc = data->vbios_boot_state.vddc_bootup_value *
+ VOLTAGE_SCALE;
+ table->BootVddci = data->vbios_boot_state.vddci_bootup_value *
+ VOLTAGE_SCALE;
+ table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value *
+ VOLTAGE_SCALE;
+
+ CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc);
+ CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci);
+ CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd);
+
+ return 0;
+}
+
+static int vegam_populate_smc_initial_state(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ uint8_t count, level;
+
+ count = (uint8_t)(table_info->vdd_dep_on_sclk->count);
+
+ for (level = 0; level < count; level++) {
+ if (table_info->vdd_dep_on_sclk->entries[level].clk >=
+ hw_data->vbios_boot_state.sclk_bootup_value) {
+ smu_data->smc_state_table.GraphicsBootLevel = level;
+ break;
+ }
+ }
+
+ count = (uint8_t)(table_info->vdd_dep_on_mclk->count);
+ for (level = 0; level < count; level++) {
+ if (table_info->vdd_dep_on_mclk->entries[level].clk >=
+ hw_data->vbios_boot_state.mclk_bootup_value) {
+ smu_data->smc_state_table.MemoryBootLevel = level;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static uint16_t scale_fan_gain_settings(uint16_t raw_setting)
+{
+ uint32_t tmp;
+ tmp = raw_setting * 4096 / 100;
+ return (uint16_t)tmp;
+}
+
+static int vegam_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+ SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table;
+ struct pp_advance_fan_control_parameters *fan_table =
+ &hwmgr->thermal_controller.advanceFanControlParameters;
+ int i, j, k;
+ const uint16_t *pdef1;
+ const uint16_t *pdef2;
+
+ table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128));
+ table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128));
+
+ PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255,
+ "Target Operating Temp is out of Range!",
+ );
+
+ table->TemperatureLimitEdge = PP_HOST_TO_SMC_US(
+ cac_dtp_table->usTargetOperatingTemp * 256);
+ table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US(
+ cac_dtp_table->usTemperatureLimitHotspot * 256);
+ table->FanGainEdge = PP_HOST_TO_SMC_US(
+ scale_fan_gain_settings(fan_table->usFanGainEdge));
+ table->FanGainHotspot = PP_HOST_TO_SMC_US(
+ scale_fan_gain_settings(fan_table->usFanGainHotspot));
+
+ pdef1 = defaults->BAPMTI_R;
+ pdef2 = defaults->BAPMTI_RC;
+
+ for (i = 0; i < SMU75_DTE_ITERATIONS; i++) {
+ for (j = 0; j < SMU75_DTE_SOURCES; j++) {
+ for (k = 0; k < SMU75_DTE_SINKS; k++) {
+ table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1);
+ table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2);
+ pdef1++;
+ pdef2++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
+{
+ uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
+ struct vegam_smumgr *smu_data =
+ (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+ table_info->vdd_dep_on_sclk;
+ uint32_t mask = (1 << ((STRAP_ASIC_RO_MSB - STRAP_ASIC_RO_LSB) + 1)) - 1;
+
+ stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount;
+
+ atomctrl_read_efuse(hwmgr, STRAP_ASIC_RO_LSB, STRAP_ASIC_RO_MSB,
+ mask, &efuse);
+
+ min = 1200;
+ max = 2500;
+
+ ro = efuse * (max - min) / 255 + min;
+
+ /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
+ for (i = 0; i < sclk_table->count; i++) {
+ smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |=
+ sclk_table->entries[i].cks_enable << i;
+ volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) *
+ 136418 - (ro - 70) * 1000000) /
+ (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+ volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 *
+ 3232 - (ro - 65) * 1000000) /
+ (2522480 - sclk_table->entries[i].clk/100 * 115764/100));
+
+ if (volt_without_cks >= volt_with_cks)
+ volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
+ sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
+
+ smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
+ }
+
+ smu_data->smc_state_table.LdoRefSel =
+ (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ?
+ table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 5;
+ /* Populate CKS Lookup Table */
+ if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
+ stretch_amount2 = 0;
+ else if (stretch_amount == 3 || stretch_amount == 4)
+ stretch_amount2 = 1;
+ else {
+ phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_ClockStretcher);
+ PP_ASSERT_WITH_CODE(false,
+ "Stretch Amount in PPTable not supported\n",
+ return -EINVAL);
+ }
+
+ value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
+ value &= 0xFFFFFFFE;
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
+
+ return 0;
+}
+
+static bool vegam_is_hw_avfs_present(struct pp_hwmgr *hwmgr)
+{
+ uint32_t efuse;
+
+ efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+ ixSMU_EFUSE_0 + (49 * 4));
+ efuse &= 0x00000001;
+
+ if (efuse)
+ return true;
+
+ return false;
+}
+
+static int vegam_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+ int result = 0;
+ struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+ AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+ AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+ uint32_t tmp, i;
+
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)hwmgr->pptable;
+ struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+ table_info->vdd_dep_on_sclk;
+
+ if (!hwmgr->avfs_supported)
+ return 0;
+
+ result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+ if (0 == result) {
+ table->BTCGB_VDROOP_TABLE[0].a0 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+ table->BTCGB_VDROOP_TABLE[0].a1 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+ table->BTCGB_VDROOP_TABLE[0].a2 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+ table->BTCGB_VDROOP_TABLE[1].a0 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+ table->BTCGB_VDROOP_TABLE[1].a1 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+ table->BTCGB_VDROOP_TABLE[1].a2 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+ table->AVFSGB_FUSE_TABLE[0].m1 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+ table->AVFSGB_FUSE_TABLE[0].m2 =
+ PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+ table->AVFSGB_FUSE_TABLE[0].b =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+ table->AVFSGB_FUSE_TABLE[0].m1_shift = 24;
+ table->AVFSGB_FUSE_TABLE[0].m2_shift = 12;
+ table->AVFSGB_FUSE_TABLE[1].m1 =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+ table->AVFSGB_FUSE_TABLE[1].m2 =
+ PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+ table->AVFSGB_FUSE_TABLE[1].b =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+ table->AVFSGB_FUSE_TABLE[1].m1_shift = 24;
+ table->AVFSGB_FUSE_TABLE[1].m2_shift = 12;
+ table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+ AVFS_meanNsigma.Aconstant[0] =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+ AVFS_meanNsigma.Aconstant[1] =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+ AVFS_meanNsigma.Aconstant[2] =
+ PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+ AVFS_meanNsigma.DC_tol_sigma =
+ PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+ AVFS_meanNsigma.Platform_mean =
+ PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+ AVFS_meanNsigma.PSM_Age_CompFactor =
+ PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+ AVFS_meanNsigma.Platform_sigma =
+ PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+ for (i = 0; i < sclk_table->count; i++) {
+ AVFS_meanNsigma.Static_Voltage_Offset[i] =
+ (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+ AVFS_SclkOffset.Sclk_Offset[i] =
+ PP_HOST_TO_SMC_US((uint16_t)
+ (sclk_table->entries[i].sclk_offset) / 100);
+ }
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, AvfsMeanNSigma),
+ &tmp, SMC_RAM_END);
+ smu7_copy_bytes_to_smc(hwmgr,
+ tmp,
+ (uint8_t *)&AVFS_meanNsigma,
+ sizeof(AVFS_meanNsigma_t),
+ SMC_RAM_END);
+
+ result = smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, AvfsSclkOffsetTable),
+ &tmp, SMC_RAM_END);
+ smu7_copy_bytes_to_smc(hwmgr,
+ tmp,
+ (uint8_t *)&AVFS_SclkOffset,
+ sizeof(AVFS_Sclk_Offset_t),
+ SMC_RAM_END);
+
+ data->avfs_vdroop_override_setting =
+ (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+ (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+ (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+ (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+ data->apply_avfs_cks_off_voltage =
+ (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+ }
+ return result;
+}
+
+static int vegam_populate_vr_config(struct pp_hwmgr *hwmgr,
+ struct SMU75_Discrete_DpmTable *table)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data =
+ (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint16_t config;
+
+ config = VR_MERGED_WITH_VDDC;
+ table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT);
+
+ /* Set Vddc Voltage Controller */
+ if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) {
+ config = VR_SVI2_PLANE_1;
+ table->VRConfig |= config;
+ } else {
+ PP_ASSERT_WITH_CODE(false,
+ "VDDC should be on SVI2 control in merged mode!",
+ );
+ }
+ /* Set Vddci Voltage Controller */
+ if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) {
+ config = VR_SVI2_PLANE_2; /* only in merged mode */
+ table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+ } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) {
+ config = VR_SMIO_PATTERN_1;
+ table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+ } else {
+ config = VR_STATIC_VOLTAGE;
+ table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+ }
+ /* Set Mvdd Voltage Controller */
+ if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) {
+ if (config != VR_SVI2_PLANE_2) {
+ config = VR_SVI2_PLANE_2;
+ table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+ cgs_write_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC,
+ smu_data->smu7_data.soft_regs_start +
+ offsetof(SMU75_SoftRegisters, AllowMvddSwitch),
+ 0x1);
+ } else {
+ PP_ASSERT_WITH_CODE(false,
+ "SVI2 Plane 2 is already taken, set MVDD as Static",);
+ config = VR_STATIC_VOLTAGE;
+ table->VRConfig = (config << VRCONF_MVDD_SHIFT);
+ }
+ } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
+ config = VR_SMIO_PATTERN_2;
+ table->VRConfig = (config << VRCONF_MVDD_SHIFT);
+ cgs_write_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC,
+ smu_data->smu7_data.soft_regs_start +
+ offsetof(SMU75_SoftRegisters, AllowMvddSwitch),
+ 0x1);
+ } else {
+ config = VR_STATIC_VOLTAGE;
+ table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+ }
+
+ return 0;
+}
+
+static int vegam_populate_svi_load_line(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+
+ smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn;
+ smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC;
+ smu_data->power_tune_table.SviLoadLineTrimVddC = 3;
+ smu_data->power_tune_table.SviLoadLineOffsetVddC = 0;
+
+ return 0;
+}
+
+static int vegam_populate_tdc_limit(struct pp_hwmgr *hwmgr)
+{
+ uint16_t tdc_limit;
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+
+ tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128);
+ smu_data->power_tune_table.TDC_VDDC_PkgLimit =
+ CONVERT_FROM_HOST_TO_SMC_US(tdc_limit);
+ smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc =
+ defaults->TDC_VDDC_ThrottleReleaseLimitPerc;
+ smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt;
+
+ return 0;
+}
+
+static int vegam_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+ uint32_t temp;
+
+ if (smu7_read_smc_sram_dword(hwmgr,
+ fuse_table_offset +
+ offsetof(SMU75_Discrete_PmFuses, TdcWaterfallCtl),
+ (uint32_t *)&temp, SMC_RAM_END))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!",
+ return -EINVAL);
+ else {
+ smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl;
+ smu_data->power_tune_table.LPMLTemperatureMin =
+ (uint8_t)((temp >> 16) & 0xff);
+ smu_data->power_tune_table.LPMLTemperatureMax =
+ (uint8_t)((temp >> 8) & 0xff);
+ smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff);
+ }
+ return 0;
+}
+
+static int vegam_populate_temperature_scaler(struct pp_hwmgr *hwmgr)
+{
+ int i;
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ /* Currently not used. Set all to zero. */
+ for (i = 0; i < 16; i++)
+ smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0;
+
+ return 0;
+}
+
+static int vegam_populate_fuzzy_fan(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+/* TO DO move to hwmgr */
+ if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15))
+ || 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity)
+ hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
+ hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity;
+
+ smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US(
+ hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity);
+ return 0;
+}
+
+static int vegam_populate_gnb_lpml(struct pp_hwmgr *hwmgr)
+{
+ int i;
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ /* Currently not used. Set all to zero. */
+ for (i = 0; i < 16; i++)
+ smu_data->power_tune_table.GnbLPML[i] = 0;
+
+ return 0;
+}
+
+static int vegam_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd;
+ uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd;
+ struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table;
+
+ hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256);
+ lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256);
+
+ smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd =
+ CONVERT_FROM_HOST_TO_SMC_US(hi_sidd);
+ smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd =
+ CONVERT_FROM_HOST_TO_SMC_US(lo_sidd);
+
+ return 0;
+}
+
+static int vegam_populate_pm_fuses(struct pp_hwmgr *hwmgr)
+{
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+ uint32_t pm_fuse_table_offset;
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_PowerContainment)) {
+ if (smu7_read_smc_sram_dword(hwmgr,
+ SMU7_FIRMWARE_HEADER_LOCATION +
+ offsetof(SMU75_Firmware_Header, PmFuseTable),
+ &pm_fuse_table_offset, SMC_RAM_END))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to get pm_fuse_table_offset Failed!",
+ return -EINVAL);
+
+ if (vegam_populate_svi_load_line(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate SviLoadLine Failed!",
+ return -EINVAL);
+
+ if (vegam_populate_tdc_limit(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate TDCLimit Failed!", return -EINVAL);
+
+ if (vegam_populate_dw8(hwmgr, pm_fuse_table_offset))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate TdcWaterfallCtl, "
+ "LPMLTemperature Min and Max Failed!",
+ return -EINVAL);
+
+ if (0 != vegam_populate_temperature_scaler(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate LPMLTemperatureScaler Failed!",
+ return -EINVAL);
+
+ if (vegam_populate_fuzzy_fan(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate Fuzzy Fan Control parameters Failed!",
+ return -EINVAL);
+
+ if (vegam_populate_gnb_lpml(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate GnbLPML Failed!",
+ return -EINVAL);
+
+ if (vegam_populate_bapm_vddc_base_leakage_sidd(hwmgr))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to populate BapmVddCBaseLeakage Hi and Lo "
+ "Sidd Failed!", return -EINVAL);
+
+ if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset,
+ (uint8_t *)&smu_data->power_tune_table,
+ (sizeof(struct SMU75_Discrete_PmFuses) - PMFUSES_AVFSSIZE),
+ SMC_RAM_END))
+ PP_ASSERT_WITH_CODE(false,
+ "Attempt to download PmFuseTable Failed!",
+ return -EINVAL);
+ }
+ return 0;
+}
+
+static int vegam_enable_reconfig_cus(struct pp_hwmgr *hwmgr)
+{
+ struct amdgpu_device *adev = hwmgr->adev;
+
+ smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_EnableModeSwitchRLCNotification,
+ adev->gfx.cu_info.number);
+
+ return 0;
+}
+
+static int vegam_init_smc_table(struct pp_hwmgr *hwmgr)
+{
+ int result;
+ struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+ struct phm_ppt_v1_information *table_info =
+ (struct phm_ppt_v1_information *)(hwmgr->pptable);
+ struct SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+ uint8_t i;
+ struct pp_atomctrl_gpio_pin_assignment gpio_pin;
+ struct phm_ppt_v1_gpio_table *gpio_table =
+ (struct phm_ppt_v1_gpio_table *)table_info->gpio_table;
+ pp_atomctrl_clock_dividers_vi dividers;
+
+ phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_AutomaticDCTransition);
+
+ vegam_initialize_power_tune_defaults(hwmgr);
+
+ if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control)
+ vegam_populate_smc_voltage_tables(hwmgr, table);
+
+ table->SystemFlags = 0;
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_AutomaticDCTransition))
+ table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC;
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_StepVddc))
+ table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
+
+ if (hw_data->is_memory_gddr5)
+ table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
+
+ if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) {
+ result = vegam_populate_ulv_state(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize ULV state!", return result);
+ cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+ ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT);
+ }
+
+ result = vegam_populate_smc_link_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize Link Level!", return result);
+
+ result = vegam_populate_all_graphic_levels(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize Graphics Level!", return result);
+
+ result = vegam_populate_all_memory_levels(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize Memory Level!", return result);
+
+ result = vegam_populate_smc_acpi_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize ACPI Level!", return result);
+
+ result = vegam_populate_smc_vce_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize VCE Level!", return result);
+
+ result = vegam_populate_smc_samu_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize SAMU Level!", return result);
+
+ /* Since only the initial state is completely set up at this point
+ * (the other states are just copies of the boot state) we only
+ * need to populate the ARB settings for the initial state.
+ */
+ result = vegam_program_memory_timing_parameters(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to Write ARB settings for the initial state.", return result);
+
+ result = vegam_populate_smc_uvd_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize UVD Level!", return result);
+
+ result = vegam_populate_smc_boot_level(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize Boot Level!", return result);
+
+ result = vegam_populate_smc_initial_state(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to initialize Boot State!", return result);
+
+ result = vegam_populate_bapm_parameters_in_dpm_table(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to populate BAPM Parameters!", return result);
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_ClockStretcher)) {
+ result = vegam_populate_clock_stretcher_data_table(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to populate Clock Stretcher Data Table!",
+ return result);
+ }
+
+ result = vegam_populate_avfs_parameters(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to populate AVFS Parameters!", return result;);
+
+ table->CurrSclkPllRange = 0xff;
+ table->GraphicsVoltageChangeEnable = 1;
+ table->GraphicsThermThrottleEnable = 1;
+ table->GraphicsInterval = 1;
+ table->VoltageInterval = 1;
+ table->ThermalInterval = 1;
+ table->TemperatureLimitHigh =
+ table_info->cac_dtp_table->usTargetOperatingTemp *
+ SMU7_Q88_FORMAT_CONVERSION_UNIT;
+ table->TemperatureLimitLow =
+ (table_info->cac_dtp_table->usTargetOperatingTemp - 1) *
+ SMU7_Q88_FORMAT_CONVERSION_UNIT;
+ table->MemoryVoltageChangeEnable = 1;
+ table->MemoryInterval = 1;
+ table->VoltageResponseTime = 0;
+ table->PhaseResponseTime = 0;
+ table->MemoryThermThrottleEnable = 1;
+
+ PP_ASSERT_WITH_CODE(hw_data->dpm_table.pcie_speed_table.count >= 1,
+ "There must be 1 or more PCIE levels defined in PPTable.",
+ return -EINVAL);
+ table->PCIeBootLinkLevel =
+ hw_data->dpm_table.pcie_speed_table.count;
+ table->PCIeGenInterval = 1;
+ table->VRConfig = 0;
+
+ result = vegam_populate_vr_config(hwmgr, table);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to populate VRConfig setting!", return result);
+
+ table->ThermGpio = 17;
+ table->SclkStepSize = 0x4000;
+
+ if (atomctrl_get_pp_assign_pin(hwmgr,
+ VDDC_VRHOT_GPIO_PINID, &gpio_pin)) {
+ table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift;
+ if (gpio_table)
+ table->VRHotLevel =
+ table_info->gpio_table->vrhot_triggered_sclk_dpm_index;
+ } else {
+ table->VRHotGpio = SMU7_UNUSED_GPIO_PIN;
+ phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_RegulatorHot);
+ }
+
+ if (atomctrl_get_pp_assign_pin(hwmgr,
+ PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin)) {
+ table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift;
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_AutomaticDCTransition) &&
+ !smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme))
+ phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme);
+ } else {
+ table->AcDcGpio = SMU7_UNUSED_GPIO_PIN;
+ phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_AutomaticDCTransition);
+ }
+
+ /* Thermal Output GPIO */
+ if (atomctrl_get_pp_assign_pin(hwmgr,
+ THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin)) {
+ table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift;
+
+ /* For porlarity read GPIOPAD_A with assigned Gpio pin
+ * since VBIOS will program this register to set 'inactive state',
+ * driver can then determine 'active state' from this and
+ * program SMU with correct polarity
+ */
+ table->ThermOutPolarity =
+ (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) &
+ (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0;
+ table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY;
+
+ /* if required, combine VRHot/PCC with thermal out GPIO */
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_RegulatorHot) &&
+ phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_CombinePCCWithThermalSignal))
+ table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT;
+ } else {
+ table->ThermOutGpio = 17;
+ table->ThermOutPolarity = 1;
+ table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE;
+ }
+
+ /* Populate BIF_SCLK levels into SMC DPM table */
+ for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) {
+ result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+ smu_data->bif_sclk_table[i], &dividers);
+ PP_ASSERT_WITH_CODE(!result,
+ "Can not find DFS divide id for Sclk",
+ return result);
+
+ if (i == 0)
+ table->Ulv.BifSclkDfs =
+ PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider));
+ else
+ table->LinkLevel[i - 1].BifSclkDfs =
+ PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider));
+ }
+
+ for (i = 0; i < SMU75_MAX_ENTRIES_SMIO; i++)
+ table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]);
+
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize);
+ CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange);
+ CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh);
+ CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow);
+ CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime);
+ CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime);
+
+ /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */
+ result = smu7_copy_bytes_to_smc(hwmgr,
+ smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable, SystemFlags),
+ (uint8_t *)&(table->SystemFlags),
+ sizeof(SMU75_Discrete_DpmTable) - 3 * sizeof(SMU75_PIDController),
+ SMC_RAM_END);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to upload dpm data to SMC memory!", return result);
+
+ result = vegam_populate_pm_fuses(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to populate PM fuses to SMC memory!", return result);
+
+ result = vegam_enable_reconfig_cus(hwmgr);
+ PP_ASSERT_WITH_CODE(!result,
+ "Failed to enable reconfigurable CUs!", return result);
+
+ return 0;
+}
+
+static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)
+{
+ switch (type) {
+ case SMU_SoftRegisters:
+ switch (member) {
+ case HandshakeDisables:
+ return offsetof(SMU75_SoftRegisters, HandshakeDisables);
+ case VoltageChangeTimeout:
+ return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);
+ case AverageGraphicsActivity:
+ return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity);
+ case PreVBlankGap:
+ return offsetof(SMU75_SoftRegisters, PreVBlankGap);
+ case VBlankTimeout:
+ return offsetof(SMU75_SoftRegisters, VBlankTimeout);
+ case UcodeLoadStatus:
+ return offsetof(SMU75_SoftRegisters, UcodeLoadStatus);
+ case DRAM_LOG_ADDR_H:
+ return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_H);
+ case DRAM_LOG_ADDR_L:
+ return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_L);
+ case DRAM_LOG_PHY_ADDR_H:
+ return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_H);
+ case DRAM_LOG_PHY_ADDR_L:
+ return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_L);
+ case DRAM_LOG_BUFF_SIZE:
+ return offsetof(SMU75_SoftRegisters, DRAM_LOG_BUFF_SIZE);
+ }
+ case SMU_Discrete_DpmTable:
+ switch (member) {
+ case UvdBootLevel:
+ return offsetof(SMU75_Discrete_DpmTable, UvdBootLevel);
+ case VceBootLevel:
+ return offsetof(SMU75_Discrete_DpmTable, VceBootLevel);
+ case SamuBootLevel:
+ return offsetof(SMU75_Discrete_DpmTable, SamuBootLevel);
+ case LowSclkInterruptThreshold:
+ return offsetof(SMU75_Discrete_DpmTable, LowSclkInterruptThreshold);
+ }
+ }
+ pr_warn("can't get the offset of type %x member %x\n", type, member);
+ return 0;
+}
+
+static int vegam_program_mem_timing_parameters(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+ if (data->need_update_smu7_dpm_table &
+ (DPMTABLE_OD_UPDATE_SCLK +
+ DPMTABLE_UPDATE_SCLK +
+ DPMTABLE_UPDATE_MCLK))
+ return vegam_program_memory_timing_parameters(hwmgr);
+
+ return 0;
+}
+
+static int vegam_update_sclk_threshold(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ struct vegam_smumgr *smu_data =
+ (struct vegam_smumgr *)(hwmgr->smu_backend);
+ int result = 0;
+ uint32_t low_sclk_interrupt_threshold = 0;
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_SclkThrottleLowNotification)
+ && (data->low_sclk_interrupt_threshold != 0)) {
+ low_sclk_interrupt_threshold =
+ data->low_sclk_interrupt_threshold;
+
+ CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold);
+
+ result = smu7_copy_bytes_to_smc(
+ hwmgr,
+ smu_data->smu7_data.dpm_table_start +
+ offsetof(SMU75_Discrete_DpmTable,
+ LowSclkInterruptThreshold),
+ (uint8_t *)&low_sclk_interrupt_threshold,
+ sizeof(uint32_t),
+ SMC_RAM_END);
+ }
+ PP_ASSERT_WITH_CODE((result == 0),
+ "Failed to update SCLK threshold!", return result);
+
+ result = vegam_program_mem_timing_parameters(hwmgr);
+ PP_ASSERT_WITH_CODE((result == 0),
+ "Failed to program memory timing parameters!",
+ );
+
+ return result;
+}
+
+int vegam_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
+{
+ struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+ int ret;
+
+ if (!hwmgr->avfs_supported)
+ return 0;
+
+ ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
+ if (!ret) {
+ if (data->apply_avfs_cks_off_voltage)
+ ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+ }
+
+ return ret;
+}
+
+static int vegam_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
+{
+ PP_ASSERT_WITH_CODE(hwmgr->thermal_controller.fanInfo.bNoFan,
+ "VBIOS fan info is not correct!",
+ );
+ phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_MicrocodeFanControl);
+ return 0;
+}
+
+const struct pp_smumgr_func vegam_smu_funcs = {
+ .smu_init = vegam_smu_init,
+ .smu_fini = smu7_smu_fini,
+ .start_smu = vegam_start_smu,
+ .check_fw_load_finish = smu7_check_fw_load_finish,
+ .request_smu_load_fw = smu7_reload_firmware,
+ .request_smu_load_specific_fw = NULL,
+ .send_msg_to_smc = smu7_send_msg_to_smc,
+ .send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter,
+ .process_firmware_header = vegam_process_firmware_header,
+ .is_dpm_running = vegam_is_dpm_running,
+ .get_mac_definition = vegam_get_mac_definition,
+ .update_smc_table = vegam_update_smc_table,
+ .init_smc_table = vegam_init_smc_table,
+ .get_offsetof = vegam_get_offsetof,
+ .populate_all_graphic_levels = vegam_populate_all_graphic_levels,
+ .populate_all_memory_levels = vegam_populate_all_memory_levels,
+ .update_sclk_threshold = vegam_update_sclk_threshold,
+ .is_hw_avfs_present = vegam_is_hw_avfs_present,
+ .thermal_avfs_enable = vegam_thermal_avfs_enable,
+ .thermal_setup_fan_table = vegam_thermal_setup_fan_table,
+};
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h
new file mode 100644
index 000000000000..2b6558238500
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _VEGAM_SMUMANAGER_H
+#define _VEGAM_SMUMANAGER_H
+
+
+#include <pp_endian.h>
+#include "smu75_discrete.h"
+#include "smu7_smumgr.h"
+
+#define SMC_RAM_END 0x40000
+
+#define DPMTuning_Uphyst_Shift 0
+#define DPMTuning_Downhyst_Shift 8
+#define DPMTuning_Activity_Shift 16
+
+#define GraphicsDPMTuning_VEGAM 0x001e6400
+#define MemoryDPMTuning_VEGAM 0x000f3c0a
+#define SclkDPMTuning_VEGAM 0x002d000a
+#define MclkDPMTuning_VEGAM 0x001f100a
+
+
+struct vegam_pt_defaults {
+ uint8_t SviLoadLineEn;
+ uint8_t SviLoadLineVddC;
+ uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
+ uint8_t TDC_MAWt;
+ uint8_t TdcWaterfallCtl;
+ uint8_t DTEAmbientTempBase;
+
+ uint32_t DisplayCac;
+ uint32_t BAPM_TEMP_GRADIENT;
+ uint16_t BAPMTI_R[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+ uint16_t BAPMTI_RC[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+};
+
+struct vegam_range_table {
+ uint32_t trans_lower_frequency; /* in 10khz */
+ uint32_t trans_upper_frequency;
+};
+
+struct vegam_smumgr {
+ struct smu7_smumgr smu7_data;
+ uint8_t protected_mode;
+ SMU75_Discrete_DpmTable smc_state_table;
+ struct SMU75_Discrete_Ulv ulv_setting;
+ struct SMU75_Discrete_PmFuses power_tune_table;
+ struct vegam_range_table range_table[NUM_SCLK_RANGE];
+ const struct vegam_pt_defaults *power_tune_defaults;
+ uint32_t bif_sclk_table[SMU75_MAX_LEVELS_LINK];
+};
+
+
+#endif